Pull ec into release branch
[pandora-kernel.git] / fs / jffs2 / readinode.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30         struct rb_node **p = &list->rb_node;
31         struct rb_node * parent = NULL;
32         struct jffs2_tmp_dnode_info *this;
33
34         while (*p) {
35                 parent = *p;
36                 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37
38                 /* There may actually be a collision here, but it doesn't
39                    actually matter. As long as the two nodes with the same
40                    version are together, it's all fine. */
41                 if (tn->version > this->version)
42                         p = &(*p)->rb_left;
43                 else
44                         p = &(*p)->rb_right;
45         }
46
47         rb_link_node(&tn->rb, parent, p);
48         rb_insert_color(&tn->rb, list);
49 }
50
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53         struct rb_node *this;
54         struct jffs2_tmp_dnode_info *tn;
55
56         this = list->rb_node;
57
58         /* Now at bottom of tree */
59         while (this) {
60                 if (this->rb_left)
61                         this = this->rb_left;
62                 else if (this->rb_right)
63                         this = this->rb_right;
64                 else {
65                         tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66                         jffs2_free_full_dnode(tn->fn);
67                         jffs2_free_tmp_dnode_info(tn);
68
69                         this = rb_parent(this);
70                         if (!this)
71                                 break;
72
73                         if (this->rb_left == &tn->rb)
74                                 this->rb_left = NULL;
75                         else if (this->rb_right == &tn->rb)
76                                 this->rb_right = NULL;
77                         else BUG();
78                 }
79         }
80         list->rb_node = NULL;
81 }
82
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85         struct jffs2_full_dirent *next;
86
87         while (fd) {
88                 next = fd->next;
89                 jffs2_free_full_dirent(fd);
90                 fd = next;
91         }
92 }
93
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97         while (ref && ref->next_in_ino) {
98                 if (!ref_obsolete(ref))
99                         return ref;
100                 dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101                 ref = ref->next_in_ino;
102         }
103         return NULL;
104 }
105
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  *          1 if the node should be marked obsolete;
112  *          negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115                                 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116                                 uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118         struct jffs2_full_dirent *fd;
119         uint32_t crc;
120
121         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
122         BUG_ON(ref_obsolete(ref));
123
124         crc = crc32(0, rd, sizeof(*rd) - 8);
125         if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126                 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127                              ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128                 return 1;
129         }
130
131         /* If we've never checked the CRCs on this node, check them now */
132         if (ref_flags(ref) == REF_UNCHECKED) {
133                 struct jffs2_eraseblock *jeb;
134                 int len;
135
136                 /* Sanity check */
137                 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138                         JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139                                     ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140                         return 1;
141                 }
142
143                 jeb = &c->blocks[ref->flash_offset / c->sector_size];
144                 len = ref_totlen(c, jeb, ref);
145
146                 spin_lock(&c->erase_completion_lock);
147                 jeb->used_size += len;
148                 jeb->unchecked_size -= len;
149                 c->used_size += len;
150                 c->unchecked_size -= len;
151                 ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
152                 spin_unlock(&c->erase_completion_lock);
153         }
154
155         fd = jffs2_alloc_full_dirent(rd->nsize + 1);
156         if (unlikely(!fd))
157                 return -ENOMEM;
158
159         fd->raw = ref;
160         fd->version = je32_to_cpu(rd->version);
161         fd->ino = je32_to_cpu(rd->ino);
162         fd->type = rd->type;
163
164         /* Pick out the mctime of the latest dirent */
165         if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
166                 *mctime_ver = fd->version;
167                 *latest_mctime = je32_to_cpu(rd->mctime);
168         }
169
170         /*
171          * Copy as much of the name as possible from the raw
172          * dirent we've already read from the flash.
173          */
174         if (read > sizeof(*rd))
175                 memcpy(&fd->name[0], &rd->name[0],
176                        min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
177
178         /* Do we need to copy any more of the name directly from the flash? */
179         if (rd->nsize + sizeof(*rd) > read) {
180                 /* FIXME: point() */
181                 int err;
182                 int already = read - sizeof(*rd);
183
184                 err = jffs2_flash_read(c, (ref_offset(ref)) + read,
185                                 rd->nsize - already, &read, &fd->name[already]);
186                 if (unlikely(read != rd->nsize - already) && likely(!err))
187                         return -EIO;
188
189                 if (unlikely(err)) {
190                         JFFS2_ERROR("read remainder of name: error %d\n", err);
191                         jffs2_free_full_dirent(fd);
192                         return -EIO;
193                 }
194         }
195
196         fd->nhash = full_name_hash(fd->name, rd->nsize);
197         fd->next = NULL;
198         fd->name[rd->nsize] = '\0';
199
200         /*
201          * Wheee. We now have a complete jffs2_full_dirent structure, with
202          * the name in it and everything. Link it into the list
203          */
204         jffs2_add_fd_to_list(c, fd, fdp);
205
206         return 0;
207 }
208
209 /*
210  * Helper function for jffs2_get_inode_nodes().
211  * It is called every time an inode node is found.
212  *
213  * Returns: 0 on succes;
214  *          1 if the node should be marked obsolete;
215  *          negative error code on failure.
216  */
217 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218                              struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
219                              uint32_t *latest_mctime, uint32_t *mctime_ver)
220 {
221         struct jffs2_tmp_dnode_info *tn;
222         uint32_t len, csize;
223         int ret = 1;
224         uint32_t crc;
225
226         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
227         BUG_ON(ref_obsolete(ref));
228
229         crc = crc32(0, rd, sizeof(*rd) - 8);
230         if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231                 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232                              ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233                 return 1;
234         }
235
236         tn = jffs2_alloc_tmp_dnode_info();
237         if (!tn) {
238                 JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
239                 return -ENOMEM;
240         }
241
242         tn->partial_crc = 0;
243         csize = je32_to_cpu(rd->csize);
244
245         /* If we've never checked the CRCs on this node, check them now */
246         if (ref_flags(ref) == REF_UNCHECKED) {
247
248                 /* Sanity checks */
249                 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
250                     unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
251                                 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
252                                 jffs2_dbg_dump_node(c, ref_offset(ref));
253                         goto free_out;
254                 }
255
256                 if (jffs2_is_writebuffered(c) && csize != 0) {
257                         /* At this point we are supposed to check the data CRC
258                          * of our unchecked node. But thus far, we do not
259                          * know whether the node is valid or obsolete. To
260                          * figure this out, we need to walk all the nodes of
261                          * the inode and build the inode fragtree. We don't
262                          * want to spend time checking data of nodes which may
263                          * later be found to be obsolete. So we put off the full
264                          * data CRC checking until we have read all the inode
265                          * nodes and have started building the fragtree.
266                          *
267                          * The fragtree is being built starting with nodes
268                          * having the highest version number, so we'll be able
269                          * to detect whether a node is valid (i.e., it is not
270                          * overlapped by a node with higher version) or not.
271                          * And we'll be able to check only those nodes, which
272                          * are not obsolete.
273                          *
274                          * Of course, this optimization only makes sense in case
275                          * of NAND flashes (or other flashes whith
276                          * !jffs2_can_mark_obsolete()), since on NOR flashes
277                          * nodes are marked obsolete physically.
278                          *
279                          * Since NAND flashes (or other flashes with
280                          * jffs2_is_writebuffered(c)) are anyway read by
281                          * fractions of c->wbuf_pagesize, and we have just read
282                          * the node header, it is likely that the starting part
283                          * of the node data is also read when we read the
284                          * header. So we don't mind to check the CRC of the
285                          * starting part of the data of the node now, and check
286                          * the second part later (in jffs2_check_node_data()).
287                          * Of course, we will not need to re-read and re-check
288                          * the NAND page which we have just read. This is why we
289                          * read the whole NAND page at jffs2_get_inode_nodes(),
290                          * while we needed only the node header.
291                          */
292                         unsigned char *buf;
293
294                         /* 'buf' will point to the start of data */
295                         buf = (unsigned char *)rd + sizeof(*rd);
296                         /* len will be the read data length */
297                         len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
298                         tn->partial_crc = crc32(0, buf, len);
299
300                         dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
301
302                         /* If we actually calculated the whole data CRC
303                          * and it is wrong, drop the node. */
304                         if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
305                                 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
306                                         ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
307                                 goto free_out;
308                         }
309
310                 } else if (csize == 0) {
311                         /*
312                          * We checked the header CRC. If the node has no data, adjust
313                          * the space accounting now. For other nodes this will be done
314                          * later either when the node is marked obsolete or when its
315                          * data is checked.
316                          */
317                         struct jffs2_eraseblock *jeb;
318
319                         dbg_readinode("the node has no data.\n");
320                         jeb = &c->blocks[ref->flash_offset / c->sector_size];
321                         len = ref_totlen(c, jeb, ref);
322
323                         spin_lock(&c->erase_completion_lock);
324                         jeb->used_size += len;
325                         jeb->unchecked_size -= len;
326                         c->used_size += len;
327                         c->unchecked_size -= len;
328                         ref->flash_offset = ref_offset(ref) | REF_NORMAL;
329                         spin_unlock(&c->erase_completion_lock);
330                 }
331         }
332
333         tn->fn = jffs2_alloc_full_dnode();
334         if (!tn->fn) {
335                 JFFS2_ERROR("alloc fn failed\n");
336                 ret = -ENOMEM;
337                 goto free_out;
338         }
339
340         tn->version = je32_to_cpu(rd->version);
341         tn->fn->ofs = je32_to_cpu(rd->offset);
342         tn->data_crc = je32_to_cpu(rd->data_crc);
343         tn->csize = csize;
344         tn->fn->raw = ref;
345
346         /* There was a bug where we wrote hole nodes out with
347            csize/dsize swapped. Deal with it */
348         if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
349                 tn->fn->size = csize;
350         else // normal case...
351                 tn->fn->size = je32_to_cpu(rd->dsize);
352
353         dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354                   ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355
356         jffs2_add_tn_to_tree(tn, tnp);
357
358         return 0;
359
360 free_out:
361         jffs2_free_tmp_dnode_info(tn);
362         return ret;
363 }
364
365 /*
366  * Helper function for jffs2_get_inode_nodes().
367  * It is called every time an unknown node is found.
368  *
369  * Returns: 0 on success;
370  *          1 if the node should be marked obsolete;
371  *          negative error code on failure.
372  */
373 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
374 {
375         /* We don't mark unknown nodes as REF_UNCHECKED */
376         if (ref_flags(ref) == REF_UNCHECKED) {
377                 JFFS2_ERROR("REF_UNCHECKED but unknown node at %#08x\n",
378                             ref_offset(ref));
379                 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n",
380                             je16_to_cpu(un->magic), je16_to_cpu(un->nodetype),
381                             je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc));
382                 return 1;
383         }
384
385         un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
386
387         switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
388
389         case JFFS2_FEATURE_INCOMPAT:
390                 JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
391                             je16_to_cpu(un->nodetype), ref_offset(ref));
392                 /* EEP */
393                 BUG();
394                 break;
395
396         case JFFS2_FEATURE_ROCOMPAT:
397                 JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
398                             je16_to_cpu(un->nodetype), ref_offset(ref));
399                 BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
400                 break;
401
402         case JFFS2_FEATURE_RWCOMPAT_COPY:
403                 JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
404                              je16_to_cpu(un->nodetype), ref_offset(ref));
405                 break;
406
407         case JFFS2_FEATURE_RWCOMPAT_DELETE:
408                 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
409                              je16_to_cpu(un->nodetype), ref_offset(ref));
410                 return 1;
411         }
412
413         return 0;
414 }
415
416 /*
417  * Helper function for jffs2_get_inode_nodes().
418  * The function detects whether more data should be read and reads it if yes.
419  *
420  * Returns: 0 on succes;
421  *          negative error code on failure.
422  */
423 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
424                      int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
425 {
426         int right_len, err, len;
427         size_t retlen;
428         uint32_t offs;
429
430         if (jffs2_is_writebuffered(c)) {
431                 right_len = c->wbuf_pagesize - (bufstart - buf);
432                 if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
433                         right_len += c->wbuf_pagesize;
434         } else
435                 right_len = right_size;
436
437         if (*rdlen == right_len)
438                 return 0;
439
440         /* We need to read more data */
441         offs = ref_offset(ref) + *rdlen;
442         if (jffs2_is_writebuffered(c)) {
443                 bufstart = buf + c->wbuf_pagesize;
444                 len = c->wbuf_pagesize;
445         } else {
446                 bufstart = buf + *rdlen;
447                 len = right_size - *rdlen;
448         }
449
450         dbg_readinode("read more %d bytes\n", len);
451
452         err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
453         if (err) {
454                 JFFS2_ERROR("can not read %d bytes from 0x%08x, "
455                         "error code: %d.\n", len, offs, err);
456                 return err;
457         }
458
459         if (retlen < len) {
460                 JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
461                                 offs, retlen, len);
462                 return -EIO;
463         }
464
465         *rdlen = right_len;
466
467         return 0;
468 }
469
470 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
471    with this ino, returning the former in order of version */
472 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
473                                  struct rb_root *tnp, struct jffs2_full_dirent **fdp,
474                                  uint32_t *highest_version, uint32_t *latest_mctime,
475                                  uint32_t *mctime_ver)
476 {
477         struct jffs2_raw_node_ref *ref, *valid_ref;
478         struct rb_root ret_tn = RB_ROOT;
479         struct jffs2_full_dirent *ret_fd = NULL;
480         unsigned char *buf = NULL;
481         union jffs2_node_union *node;
482         size_t retlen;
483         int len, err;
484
485         *mctime_ver = 0;
486
487         dbg_readinode("ino #%u\n", f->inocache->ino);
488
489         if (jffs2_is_writebuffered(c)) {
490                 /*
491                  * If we have the write buffer, we assume the minimal I/O unit
492                  * is c->wbuf_pagesize. We implement some optimizations which in
493                  * this case and we need a temporary buffer of size =
494                  * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
495                  * Basically, we want to read not only the node header, but the
496                  * whole wbuf (NAND page in case of NAND) or 2, if the node
497                  * header overlaps the border between the 2 wbufs.
498                  */
499                 len = 2*c->wbuf_pagesize;
500         } else {
501                 /*
502                  * When there is no write buffer, the size of the temporary
503                  * buffer is the size of the larges node header.
504                  */
505                 len = sizeof(union jffs2_node_union);
506         }
507
508         /* FIXME: in case of NOR and available ->point() this
509          * needs to be fixed. */
510         buf = kmalloc(len, GFP_KERNEL);
511         if (!buf)
512                 return -ENOMEM;
513
514         spin_lock(&c->erase_completion_lock);
515         valid_ref = jffs2_first_valid_node(f->inocache->nodes);
516         if (!valid_ref && f->inocache->ino != 1)
517                 JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
518         while (valid_ref) {
519                 unsigned char *bufstart;
520
521                 /* We can hold a pointer to a non-obsolete node without the spinlock,
522                    but _obsolete_ nodes may disappear at any time, if the block
523                    they're in gets erased. So if we mark 'ref' obsolete while we're
524                    not holding the lock, it can go away immediately. For that reason,
525                    we find the next valid node first, before processing 'ref'.
526                 */
527                 ref = valid_ref;
528                 valid_ref = jffs2_first_valid_node(ref->next_in_ino);
529                 spin_unlock(&c->erase_completion_lock);
530
531                 cond_resched();
532
533                 /*
534                  * At this point we don't know the type of the node we're going
535                  * to read, so we do not know the size of its header. In order
536                  * to minimize the amount of flash IO we assume the node has
537                  * size = JFFS2_MIN_NODE_HEADER.
538                  */
539                 if (jffs2_is_writebuffered(c)) {
540                         /*
541                          * We treat 'buf' as 2 adjacent wbufs. We want to
542                          * adjust bufstart such as it points to the
543                          * beginning of the node within this wbuf.
544                          */
545                         bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
546                         /* We will read either one wbuf or 2 wbufs. */
547                         len = c->wbuf_pagesize - (bufstart - buf);
548                         if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
549                                 /* The header spans the border of the first wbuf */
550                                 len += c->wbuf_pagesize;
551                         }
552                 } else {
553                         bufstart = buf;
554                         len = JFFS2_MIN_NODE_HEADER;
555                 }
556
557                 dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
558
559                 /* FIXME: point() */
560                 err = jffs2_flash_read(c, ref_offset(ref), len,
561                                        &retlen, bufstart);
562                 if (err) {
563                         JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
564                         goto free_out;
565                 }
566
567                 if (retlen < len) {
568                         JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
569                         err = -EIO;
570                         goto free_out;
571                 }
572
573                 node = (union jffs2_node_union *)bufstart;
574
575                 /* No need to mask in the valid bit; it shouldn't be invalid */
576                 if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
577                         JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
578                                      ref_offset(ref), je16_to_cpu(node->u.magic),
579                                      je16_to_cpu(node->u.nodetype),
580                                      je32_to_cpu(node->u.totlen),
581                                      je32_to_cpu(node->u.hdr_crc));
582                         jffs2_dbg_dump_node(c, ref_offset(ref));
583                         jffs2_mark_node_obsolete(c, ref);
584                         goto cont;
585                 }
586                 /* Due to poor choice of crc32 seed, an all-zero node will have a correct CRC */
587                 if (!je32_to_cpu(node->u.hdr_crc) && !je16_to_cpu(node->u.nodetype) &&
588                     !je16_to_cpu(node->u.magic) && !je32_to_cpu(node->u.totlen)) {
589                         JFFS2_NOTICE("All zero node header at %#08x.\n", ref_offset(ref));
590                         jffs2_mark_node_obsolete(c, ref);
591                         goto cont;
592                 }
593
594                 switch (je16_to_cpu(node->u.nodetype)) {
595
596                 case JFFS2_NODETYPE_DIRENT:
597
598                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
599                                 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
600                                 if (unlikely(err))
601                                         goto free_out;
602                         }
603
604                         err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
605                         if (err == 1) {
606                                 jffs2_mark_node_obsolete(c, ref);
607                                 break;
608                         } else if (unlikely(err))
609                                 goto free_out;
610
611                         if (je32_to_cpu(node->d.version) > *highest_version)
612                                 *highest_version = je32_to_cpu(node->d.version);
613
614                         break;
615
616                 case JFFS2_NODETYPE_INODE:
617
618                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
619                                 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
620                                 if (unlikely(err))
621                                         goto free_out;
622                         }
623
624                         err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
625                         if (err == 1) {
626                                 jffs2_mark_node_obsolete(c, ref);
627                                 break;
628                         } else if (unlikely(err))
629                                 goto free_out;
630
631                         if (je32_to_cpu(node->i.version) > *highest_version)
632                                 *highest_version = je32_to_cpu(node->i.version);
633
634                         break;
635
636                 default:
637                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
638                                 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
639                                 if (unlikely(err))
640                                         goto free_out;
641                         }
642
643                         err = read_unknown(c, ref, &node->u);
644                         if (err == 1) {
645                                 jffs2_mark_node_obsolete(c, ref);
646                                 break;
647                         } else if (unlikely(err))
648                                 goto free_out;
649
650                 }
651         cont:
652                 spin_lock(&c->erase_completion_lock);
653         }
654
655         spin_unlock(&c->erase_completion_lock);
656         *tnp = ret_tn;
657         *fdp = ret_fd;
658         kfree(buf);
659
660         dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
661                         f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
662         return 0;
663
664  free_out:
665         jffs2_free_tmp_dnode_info_list(&ret_tn);
666         jffs2_free_full_dirent_list(ret_fd);
667         kfree(buf);
668         return err;
669 }
670
671 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
672                                         struct jffs2_inode_info *f,
673                                         struct jffs2_raw_inode *latest_node)
674 {
675         struct jffs2_tmp_dnode_info *tn;
676         struct rb_root tn_list;
677         struct rb_node *rb, *repl_rb;
678         struct jffs2_full_dirent *fd_list;
679         struct jffs2_full_dnode *fn, *first_fn = NULL;
680         uint32_t crc;
681         uint32_t latest_mctime, mctime_ver;
682         size_t retlen;
683         int ret;
684
685         dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
686
687         /* Grab all nodes relevant to this ino */
688         ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
689
690         if (ret) {
691                 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
692                 if (f->inocache->state == INO_STATE_READING)
693                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
694                 return ret;
695         }
696         f->dents = fd_list;
697
698         rb = rb_first(&tn_list);
699
700         while (rb) {
701                 cond_resched();
702                 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
703                 fn = tn->fn;
704                 ret = 1;
705                 dbg_readinode("consider node ver %u, phys offset "
706                         "%#08x(%d), range %u-%u.\n", tn->version,
707                         ref_offset(fn->raw), ref_flags(fn->raw),
708                         fn->ofs, fn->ofs + fn->size);
709
710                 if (fn->size) {
711                         ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
712                         /* TODO: the error code isn't checked, check it */
713                         jffs2_dbg_fragtree_paranoia_check_nolock(f);
714                         BUG_ON(ret < 0);
715                         if (!first_fn && ret == 0)
716                                 first_fn = fn;
717                 } else if (!first_fn) {
718                         first_fn = fn;
719                         f->metadata = fn;
720                         ret = 0; /* Prevent freeing the metadata update node */
721                 } else
722                         jffs2_mark_node_obsolete(c, fn->raw);
723
724                 BUG_ON(rb->rb_left);
725                 if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
726                         /* We were then left-hand child of our parent. We need
727                          * to move our own right-hand child into our place. */
728                         repl_rb = rb->rb_right;
729                         if (repl_rb)
730                                 rb_set_parent(repl_rb, rb_parent(rb));
731                 } else
732                         repl_rb = NULL;
733
734                 rb = rb_next(rb);
735
736                 /* Remove the spent tn from the tree; don't bother rebalancing
737                  * but put our right-hand child in our own place. */
738                 if (rb_parent(&tn->rb)) {
739                         if (rb_parent(&tn->rb)->rb_left == &tn->rb)
740                                 rb_parent(&tn->rb)->rb_left = repl_rb;
741                         else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
742                                 rb_parent(&tn->rb)->rb_right = repl_rb;
743                         else BUG();
744                 } else if (tn->rb.rb_right)
745                         rb_set_parent(tn->rb.rb_right, NULL);
746
747                 jffs2_free_tmp_dnode_info(tn);
748                 if (ret) {
749                         dbg_readinode("delete dnode %u-%u.\n",
750                                 fn->ofs, fn->ofs + fn->size);
751                         jffs2_free_full_dnode(fn);
752                 }
753         }
754         jffs2_dbg_fragtree_paranoia_check_nolock(f);
755
756         BUG_ON(first_fn && ref_obsolete(first_fn->raw));
757
758         fn = first_fn;
759         if (unlikely(!first_fn)) {
760                 /* No data nodes for this inode. */
761                 if (f->inocache->ino != 1) {
762                         JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
763                         if (!fd_list) {
764                                 if (f->inocache->state == INO_STATE_READING)
765                                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
766                                 return -EIO;
767                         }
768                         JFFS2_NOTICE("but it has children so we fake some modes for it\n");
769                 }
770                 latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
771                 latest_node->version = cpu_to_je32(0);
772                 latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
773                 latest_node->isize = cpu_to_je32(0);
774                 latest_node->gid = cpu_to_je16(0);
775                 latest_node->uid = cpu_to_je16(0);
776                 if (f->inocache->state == INO_STATE_READING)
777                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
778                 return 0;
779         }
780
781         ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
782         if (ret || retlen != sizeof(*latest_node)) {
783                 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
784                         ret, retlen, sizeof(*latest_node));
785                 /* FIXME: If this fails, there seems to be a memory leak. Find it. */
786                 up(&f->sem);
787                 jffs2_do_clear_inode(c, f);
788                 return ret?ret:-EIO;
789         }
790
791         crc = crc32(0, latest_node, sizeof(*latest_node)-8);
792         if (crc != je32_to_cpu(latest_node->node_crc)) {
793                 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
794                         f->inocache->ino, ref_offset(fn->raw));
795                 up(&f->sem);
796                 jffs2_do_clear_inode(c, f);
797                 return -EIO;
798         }
799
800         switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
801         case S_IFDIR:
802                 if (mctime_ver > je32_to_cpu(latest_node->version)) {
803                         /* The times in the latest_node are actually older than
804                            mctime in the latest dirent. Cheat. */
805                         latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
806                 }
807                 break;
808
809
810         case S_IFREG:
811                 /* If it was a regular file, truncate it to the latest node's isize */
812                 jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
813                 break;
814
815         case S_IFLNK:
816                 /* Hack to work around broken isize in old symlink code.
817                    Remove this when dwmw2 comes to his senses and stops
818                    symlinks from being an entirely gratuitous special
819                    case. */
820                 if (!je32_to_cpu(latest_node->isize))
821                         latest_node->isize = latest_node->dsize;
822
823                 if (f->inocache->state != INO_STATE_CHECKING) {
824                         /* Symlink's inode data is the target path. Read it and
825                          * keep in RAM to facilitate quick follow symlink
826                          * operation. */
827                         f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
828                         if (!f->target) {
829                                 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
830                                 up(&f->sem);
831                                 jffs2_do_clear_inode(c, f);
832                                 return -ENOMEM;
833                         }
834
835                         ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
836                                                 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
837
838                         if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
839                                 if (retlen != je32_to_cpu(latest_node->csize))
840                                         ret = -EIO;
841                                 kfree(f->target);
842                                 f->target = NULL;
843                                 up(&f->sem);
844                                 jffs2_do_clear_inode(c, f);
845                                 return -ret;
846                         }
847
848                         f->target[je32_to_cpu(latest_node->csize)] = '\0';
849                         dbg_readinode("symlink's target '%s' cached\n", f->target);
850                 }
851
852                 /* fall through... */
853
854         case S_IFBLK:
855         case S_IFCHR:
856                 /* Certain inode types should have only one data node, and it's
857                    kept as the metadata node */
858                 if (f->metadata) {
859                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
860                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
861                         up(&f->sem);
862                         jffs2_do_clear_inode(c, f);
863                         return -EIO;
864                 }
865                 if (!frag_first(&f->fragtree)) {
866                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
867                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
868                         up(&f->sem);
869                         jffs2_do_clear_inode(c, f);
870                         return -EIO;
871                 }
872                 /* ASSERT: f->fraglist != NULL */
873                 if (frag_next(frag_first(&f->fragtree))) {
874                         JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
875                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
876                         /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
877                         up(&f->sem);
878                         jffs2_do_clear_inode(c, f);
879                         return -EIO;
880                 }
881                 /* OK. We're happy */
882                 f->metadata = frag_first(&f->fragtree)->node;
883                 jffs2_free_node_frag(frag_first(&f->fragtree));
884                 f->fragtree = RB_ROOT;
885                 break;
886         }
887         if (f->inocache->state == INO_STATE_READING)
888                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
889
890         return 0;
891 }
892
893 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
894 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
895                         uint32_t ino, struct jffs2_raw_inode *latest_node)
896 {
897         dbg_readinode("read inode #%u\n", ino);
898
899  retry_inocache:
900         spin_lock(&c->inocache_lock);
901         f->inocache = jffs2_get_ino_cache(c, ino);
902
903         if (f->inocache) {
904                 /* Check its state. We may need to wait before we can use it */
905                 switch(f->inocache->state) {
906                 case INO_STATE_UNCHECKED:
907                 case INO_STATE_CHECKEDABSENT:
908                         f->inocache->state = INO_STATE_READING;
909                         break;
910
911                 case INO_STATE_CHECKING:
912                 case INO_STATE_GC:
913                         /* If it's in either of these states, we need
914                            to wait for whoever's got it to finish and
915                            put it back. */
916                         dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
917                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
918                         goto retry_inocache;
919
920                 case INO_STATE_READING:
921                 case INO_STATE_PRESENT:
922                         /* Eep. This should never happen. It can
923                         happen if Linux calls read_inode() again
924                         before clear_inode() has finished though. */
925                         JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
926                         /* Fail. That's probably better than allowing it to succeed */
927                         f->inocache = NULL;
928                         break;
929
930                 default:
931                         BUG();
932                 }
933         }
934         spin_unlock(&c->inocache_lock);
935
936         if (!f->inocache && ino == 1) {
937                 /* Special case - no root inode on medium */
938                 f->inocache = jffs2_alloc_inode_cache();
939                 if (!f->inocache) {
940                         JFFS2_ERROR("cannot allocate inocache for root inode\n");
941                         return -ENOMEM;
942                 }
943                 dbg_readinode("creating inocache for root inode\n");
944                 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
945                 f->inocache->ino = f->inocache->nlink = 1;
946                 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
947                 f->inocache->state = INO_STATE_READING;
948                 jffs2_add_ino_cache(c, f->inocache);
949         }
950         if (!f->inocache) {
951                 JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
952                 return -ENOENT;
953         }
954
955         return jffs2_do_read_inode_internal(c, f, latest_node);
956 }
957
958 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
959 {
960         struct jffs2_raw_inode n;
961         struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL);
962         int ret;
963
964         if (!f)
965                 return -ENOMEM;
966
967         init_MUTEX_LOCKED(&f->sem);
968         f->inocache = ic;
969
970         ret = jffs2_do_read_inode_internal(c, f, &n);
971         if (!ret) {
972                 up(&f->sem);
973                 jffs2_do_clear_inode(c, f);
974         }
975         kfree (f);
976         return ret;
977 }
978
979 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
980 {
981         struct jffs2_full_dirent *fd, *fds;
982         int deleted;
983
984         jffs2_clear_acl(f);
985         jffs2_xattr_delete_inode(c, f->inocache);
986         down(&f->sem);
987         deleted = f->inocache && !f->inocache->nlink;
988
989         if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
990                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
991
992         if (f->metadata) {
993                 if (deleted)
994                         jffs2_mark_node_obsolete(c, f->metadata->raw);
995                 jffs2_free_full_dnode(f->metadata);
996         }
997
998         jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
999
1000         if (f->target) {
1001                 kfree(f->target);
1002                 f->target = NULL;
1003         }
1004
1005         fds = f->dents;
1006         while(fds) {
1007                 fd = fds;
1008                 fds = fd->next;
1009                 jffs2_free_full_dirent(fd);
1010         }
1011
1012         if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
1013                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
1014                 if (f->inocache->nodes == (void *)f->inocache)
1015                         jffs2_del_ino_cache(c, f->inocache);
1016         }
1017
1018         up(&f->sem);
1019 }