Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/agpgart
[pandora-kernel.git] / fs / jffs2 / readinode.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30         struct rb_node **p = &list->rb_node;
31         struct rb_node * parent = NULL;
32         struct jffs2_tmp_dnode_info *this;
33
34         while (*p) {
35                 parent = *p;
36                 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37
38                 /* There may actually be a collision here, but it doesn't
39                    actually matter. As long as the two nodes with the same
40                    version are together, it's all fine. */
41                 if (tn->version > this->version)
42                         p = &(*p)->rb_left;
43                 else
44                         p = &(*p)->rb_right;
45         }
46
47         rb_link_node(&tn->rb, parent, p);
48         rb_insert_color(&tn->rb, list);
49 }
50
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53         struct rb_node *this;
54         struct jffs2_tmp_dnode_info *tn;
55
56         this = list->rb_node;
57
58         /* Now at bottom of tree */
59         while (this) {
60                 if (this->rb_left)
61                         this = this->rb_left;
62                 else if (this->rb_right)
63                         this = this->rb_right;
64                 else {
65                         tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66                         jffs2_free_full_dnode(tn->fn);
67                         jffs2_free_tmp_dnode_info(tn);
68
69                         this = this->rb_parent;
70                         if (!this)
71                                 break;
72
73                         if (this->rb_left == &tn->rb)
74                                 this->rb_left = NULL;
75                         else if (this->rb_right == &tn->rb)
76                                 this->rb_right = NULL;
77                         else BUG();
78                 }
79         }
80         list->rb_node = NULL;
81 }
82
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85         struct jffs2_full_dirent *next;
86
87         while (fd) {
88                 next = fd->next;
89                 jffs2_free_full_dirent(fd);
90                 fd = next;
91         }
92 }
93
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97         while (ref && ref->next_in_ino) {
98                 if (!ref_obsolete(ref))
99                         return ref;
100                 dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101                 ref = ref->next_in_ino;
102         }
103         return NULL;
104 }
105
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  *          1 if the node should be marked obsolete;
112  *          negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115                                 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116                                 uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118         struct jffs2_full_dirent *fd;
119
120         /* The direntry nodes are checked during the flash scanning */
121         BUG_ON(ref_flags(ref) == REF_UNCHECKED);
122         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
123         BUG_ON(ref_obsolete(ref));
124
125         /* Sanity check */
126         if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
127                 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
128                        ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
129                 return 1;
130         }
131
132         fd = jffs2_alloc_full_dirent(rd->nsize + 1);
133         if (unlikely(!fd))
134                 return -ENOMEM;
135
136         fd->raw = ref;
137         fd->version = je32_to_cpu(rd->version);
138         fd->ino = je32_to_cpu(rd->ino);
139         fd->type = rd->type;
140
141         /* Pick out the mctime of the latest dirent */
142         if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
143                 *mctime_ver = fd->version;
144                 *latest_mctime = je32_to_cpu(rd->mctime);
145         }
146
147         /*
148          * Copy as much of the name as possible from the raw
149          * dirent we've already read from the flash.
150          */
151         if (read > sizeof(*rd))
152                 memcpy(&fd->name[0], &rd->name[0],
153                        min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
154
155         /* Do we need to copy any more of the name directly from the flash? */
156         if (rd->nsize + sizeof(*rd) > read) {
157                 /* FIXME: point() */
158                 int err;
159                 int already = read - sizeof(*rd);
160
161                 err = jffs2_flash_read(c, (ref_offset(ref)) + read,
162                                 rd->nsize - already, &read, &fd->name[already]);
163                 if (unlikely(read != rd->nsize - already) && likely(!err))
164                         return -EIO;
165
166                 if (unlikely(err)) {
167                         JFFS2_ERROR("read remainder of name: error %d\n", err);
168                         jffs2_free_full_dirent(fd);
169                         return -EIO;
170                 }
171         }
172
173         fd->nhash = full_name_hash(fd->name, rd->nsize);
174         fd->next = NULL;
175         fd->name[rd->nsize] = '\0';
176
177         /*
178          * Wheee. We now have a complete jffs2_full_dirent structure, with
179          * the name in it and everything. Link it into the list
180          */
181         jffs2_add_fd_to_list(c, fd, fdp);
182
183         return 0;
184 }
185
186 /*
187  * Helper function for jffs2_get_inode_nodes().
188  * It is called every time an inode node is found.
189  *
190  * Returns: 0 on succes;
191  *          1 if the node should be marked obsolete;
192  *          negative error code on failure.
193  */
194 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
195                              struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
196                              uint32_t *latest_mctime, uint32_t *mctime_ver)
197 {
198         struct jffs2_tmp_dnode_info *tn;
199         uint32_t len, csize;
200         int ret = 1;
201
202         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
203         BUG_ON(ref_obsolete(ref));
204
205         tn = jffs2_alloc_tmp_dnode_info();
206         if (!tn) {
207                 JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn));
208                 return -ENOMEM;
209         }
210
211         tn->partial_crc = 0;
212         csize = je32_to_cpu(rd->csize);
213
214         /* If we've never checked the CRCs on this node, check them now */
215         if (ref_flags(ref) == REF_UNCHECKED) {
216                 uint32_t crc;
217
218                 crc = crc32(0, rd, sizeof(*rd) - 8);
219                 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
220                         JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
221                                         ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
222                         goto free_out;
223                 }
224
225                 /* Sanity checks */
226                 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
227                     unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
228                                 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
229                                 jffs2_dbg_dump_node(c, ref_offset(ref));
230                         goto free_out;
231                 }
232
233                 if (jffs2_is_writebuffered(c) && csize != 0) {
234                         /* At this point we are supposed to check the data CRC
235                          * of our unchecked node. But thus far, we do not
236                          * know whether the node is valid or obsolete. To
237                          * figure this out, we need to walk all the nodes of
238                          * the inode and build the inode fragtree. We don't
239                          * want to spend time checking data of nodes which may
240                          * later be found to be obsolete. So we put off the full
241                          * data CRC checking until we have read all the inode
242                          * nodes and have started building the fragtree.
243                          *
244                          * The fragtree is being built starting with nodes
245                          * having the highest version number, so we'll be able
246                          * to detect whether a node is valid (i.e., it is not
247                          * overlapped by a node with higher version) or not.
248                          * And we'll be able to check only those nodes, which
249                          * are not obsolete.
250                          *
251                          * Of course, this optimization only makes sense in case
252                          * of NAND flashes (or other flashes whith
253                          * !jffs2_can_mark_obsolete()), since on NOR flashes
254                          * nodes are marked obsolete physically.
255                          *
256                          * Since NAND flashes (or other flashes with
257                          * jffs2_is_writebuffered(c)) are anyway read by
258                          * fractions of c->wbuf_pagesize, and we have just read
259                          * the node header, it is likely that the starting part
260                          * of the node data is also read when we read the
261                          * header. So we don't mind to check the CRC of the
262                          * starting part of the data of the node now, and check
263                          * the second part later (in jffs2_check_node_data()).
264                          * Of course, we will not need to re-read and re-check
265                          * the NAND page which we have just read. This is why we
266                          * read the whole NAND page at jffs2_get_inode_nodes(),
267                          * while we needed only the node header.
268                          */
269                         unsigned char *buf;
270
271                         /* 'buf' will point to the start of data */
272                         buf = (unsigned char *)rd + sizeof(*rd);
273                         /* len will be the read data length */
274                         len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
275                         tn->partial_crc = crc32(0, buf, len);
276
277                         dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
278
279                         /* If we actually calculated the whole data CRC
280                          * and it is wrong, drop the node. */
281                         if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
282                                 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
283                                         ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
284                                 goto free_out;
285                         }
286
287                 } else if (csize == 0) {
288                         /*
289                          * We checked the header CRC. If the node has no data, adjust
290                          * the space accounting now. For other nodes this will be done
291                          * later either when the node is marked obsolete or when its
292                          * data is checked.
293                          */
294                         struct jffs2_eraseblock *jeb;
295
296                         dbg_readinode("the node has no data.\n");
297                         jeb = &c->blocks[ref->flash_offset / c->sector_size];
298                         len = ref_totlen(c, jeb, ref);
299
300                         spin_lock(&c->erase_completion_lock);
301                         jeb->used_size += len;
302                         jeb->unchecked_size -= len;
303                         c->used_size += len;
304                         c->unchecked_size -= len;
305                         ref->flash_offset = ref_offset(ref) | REF_NORMAL;
306                         spin_unlock(&c->erase_completion_lock);
307                 }
308         }
309
310         tn->fn = jffs2_alloc_full_dnode();
311         if (!tn->fn) {
312                 JFFS2_ERROR("alloc fn failed\n");
313                 ret = -ENOMEM;
314                 goto free_out;
315         }
316
317         tn->version = je32_to_cpu(rd->version);
318         tn->fn->ofs = je32_to_cpu(rd->offset);
319         tn->data_crc = je32_to_cpu(rd->data_crc);
320         tn->csize = csize;
321         tn->fn->raw = ref;
322
323         /* There was a bug where we wrote hole nodes out with
324            csize/dsize swapped. Deal with it */
325         if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
326                 tn->fn->size = csize;
327         else // normal case...
328                 tn->fn->size = je32_to_cpu(rd->dsize);
329
330         dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
331                   ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
332
333         jffs2_add_tn_to_tree(tn, tnp);
334
335         return 0;
336
337 free_out:
338         jffs2_free_tmp_dnode_info(tn);
339         return ret;
340 }
341
342 /*
343  * Helper function for jffs2_get_inode_nodes().
344  * It is called every time an unknown node is found.
345  *
346  * Returns: 0 on succes;
347  *          1 if the node should be marked obsolete;
348  *          negative error code on failure.
349  */
350 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
351 {
352         /* We don't mark unknown nodes as REF_UNCHECKED */
353         BUG_ON(ref_flags(ref) == REF_UNCHECKED);
354
355         un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
356
357         if (crc32(0, un, sizeof(struct jffs2_unknown_node) - 4) != je32_to_cpu(un->hdr_crc)) {
358                 /* Hmmm. This should have been caught at scan time. */
359                 JFFS2_NOTICE("node header CRC failed at %#08x. But it must have been OK earlier.\n", ref_offset(ref));
360                 jffs2_dbg_dump_node(c, ref_offset(ref));
361                 return 1;
362         } else {
363                 switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
364
365                 case JFFS2_FEATURE_INCOMPAT:
366                         JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
367                                 je16_to_cpu(un->nodetype), ref_offset(ref));
368                         /* EEP */
369                         BUG();
370                         break;
371
372                 case JFFS2_FEATURE_ROCOMPAT:
373                         JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
374                                         je16_to_cpu(un->nodetype), ref_offset(ref));
375                         BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
376                         break;
377
378                 case JFFS2_FEATURE_RWCOMPAT_COPY:
379                         JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
380                                         je16_to_cpu(un->nodetype), ref_offset(ref));
381                         break;
382
383                 case JFFS2_FEATURE_RWCOMPAT_DELETE:
384                         JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
385                                         je16_to_cpu(un->nodetype), ref_offset(ref));
386                         return 1;
387                 }
388         }
389
390         return 0;
391 }
392
393 /*
394  * Helper function for jffs2_get_inode_nodes().
395  * The function detects whether more data should be read and reads it if yes.
396  *
397  * Returns: 0 on succes;
398  *          negative error code on failure.
399  */
400 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
401                      int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
402 {
403         int right_len, err, len;
404         size_t retlen;
405         uint32_t offs;
406
407         if (jffs2_is_writebuffered(c)) {
408                 right_len = c->wbuf_pagesize - (bufstart - buf);
409                 if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
410                         right_len += c->wbuf_pagesize;
411         } else
412                 right_len = right_size;
413
414         if (*rdlen == right_len)
415                 return 0;
416
417         /* We need to read more data */
418         offs = ref_offset(ref) + *rdlen;
419         if (jffs2_is_writebuffered(c)) {
420                 bufstart = buf + c->wbuf_pagesize;
421                 len = c->wbuf_pagesize;
422         } else {
423                 bufstart = buf + *rdlen;
424                 len = right_size - *rdlen;
425         }
426
427         dbg_readinode("read more %d bytes\n", len);
428
429         err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
430         if (err) {
431                 JFFS2_ERROR("can not read %d bytes from 0x%08x, "
432                         "error code: %d.\n", len, offs, err);
433                 return err;
434         }
435
436         if (retlen < len) {
437                 JFFS2_ERROR("short read at %#08x: %d instead of %d.\n",
438                                 offs, retlen, len);
439                 return -EIO;
440         }
441
442         *rdlen = right_len;
443
444         return 0;
445 }
446
447 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
448    with this ino, returning the former in order of version */
449 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
450                                  struct rb_root *tnp, struct jffs2_full_dirent **fdp,
451                                  uint32_t *highest_version, uint32_t *latest_mctime,
452                                  uint32_t *mctime_ver)
453 {
454         struct jffs2_raw_node_ref *ref, *valid_ref;
455         struct rb_root ret_tn = RB_ROOT;
456         struct jffs2_full_dirent *ret_fd = NULL;
457         unsigned char *buf = NULL;
458         union jffs2_node_union *node;
459         size_t retlen;
460         int len, err;
461
462         *mctime_ver = 0;
463
464         dbg_readinode("ino #%u\n", f->inocache->ino);
465
466         if (jffs2_is_writebuffered(c)) {
467                 /*
468                  * If we have the write buffer, we assume the minimal I/O unit
469                  * is c->wbuf_pagesize. We implement some optimizations which in
470                  * this case and we need a temporary buffer of size =
471                  * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
472                  * Basically, we want to read not only the node header, but the
473                  * whole wbuf (NAND page in case of NAND) or 2, if the node
474                  * header overlaps the border between the 2 wbufs.
475                  */
476                 len = 2*c->wbuf_pagesize;
477         } else {
478                 /*
479                  * When there is no write buffer, the size of the temporary
480                  * buffer is the size of the larges node header.
481                  */
482                 len = sizeof(union jffs2_node_union);
483         }
484
485         /* FIXME: in case of NOR and available ->point() this
486          * needs to be fixed. */
487         buf = kmalloc(len, GFP_KERNEL);
488         if (!buf)
489                 return -ENOMEM;
490
491         spin_lock(&c->erase_completion_lock);
492         valid_ref = jffs2_first_valid_node(f->inocache->nodes);
493         if (!valid_ref && f->inocache->ino != 1)
494                 JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
495         while (valid_ref) {
496                 unsigned char *bufstart;
497
498                 /* We can hold a pointer to a non-obsolete node without the spinlock,
499                    but _obsolete_ nodes may disappear at any time, if the block
500                    they're in gets erased. So if we mark 'ref' obsolete while we're
501                    not holding the lock, it can go away immediately. For that reason,
502                    we find the next valid node first, before processing 'ref'.
503                 */
504                 ref = valid_ref;
505                 valid_ref = jffs2_first_valid_node(ref->next_in_ino);
506                 spin_unlock(&c->erase_completion_lock);
507
508                 cond_resched();
509
510                 /*
511                  * At this point we don't know the type of the node we're going
512                  * to read, so we do not know the size of its header. In order
513                  * to minimize the amount of flash IO we assume the node has
514                  * size = JFFS2_MIN_NODE_HEADER.
515                  */
516                 if (jffs2_is_writebuffered(c)) {
517                         /*
518                          * We treat 'buf' as 2 adjacent wbufs. We want to
519                          * adjust bufstart such as it points to the
520                          * beginning of the node within this wbuf.
521                          */
522                         bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
523                         /* We will read either one wbuf or 2 wbufs. */
524                         len = c->wbuf_pagesize - (bufstart - buf);
525                         if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
526                                 /* The header spans the border of the first wbuf */
527                                 len += c->wbuf_pagesize;
528                         }
529                 } else {
530                         bufstart = buf;
531                         len = JFFS2_MIN_NODE_HEADER;
532                 }
533
534                 dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
535
536                 /* FIXME: point() */
537                 err = jffs2_flash_read(c, ref_offset(ref), len,
538                                        &retlen, bufstart);
539                 if (err) {
540                         JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
541                         goto free_out;
542                 }
543
544                 if (retlen < len) {
545                         JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len);
546                         err = -EIO;
547                         goto free_out;
548                 }
549
550                 node = (union jffs2_node_union *)bufstart;
551
552                 switch (je16_to_cpu(node->u.nodetype)) {
553
554                 case JFFS2_NODETYPE_DIRENT:
555
556                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
557                                 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
558                                 if (unlikely(err))
559                                         goto free_out;
560                         }
561
562                         err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
563                         if (err == 1) {
564                                 jffs2_mark_node_obsolete(c, ref);
565                                 break;
566                         } else if (unlikely(err))
567                                 goto free_out;
568
569                         if (je32_to_cpu(node->d.version) > *highest_version)
570                                 *highest_version = je32_to_cpu(node->d.version);
571
572                         break;
573
574                 case JFFS2_NODETYPE_INODE:
575
576                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
577                                 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
578                                 if (unlikely(err))
579                                         goto free_out;
580                         }
581
582                         err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
583                         if (err == 1) {
584                                 jffs2_mark_node_obsolete(c, ref);
585                                 break;
586                         } else if (unlikely(err))
587                                 goto free_out;
588
589                         if (je32_to_cpu(node->i.version) > *highest_version)
590                                 *highest_version = je32_to_cpu(node->i.version);
591
592                         break;
593
594                 default:
595                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
596                                 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
597                                 if (unlikely(err))
598                                         goto free_out;
599                         }
600
601                         err = read_unknown(c, ref, &node->u);
602                         if (err == 1) {
603                                 jffs2_mark_node_obsolete(c, ref);
604                                 break;
605                         } else if (unlikely(err))
606                                 goto free_out;
607
608                 }
609                 spin_lock(&c->erase_completion_lock);
610         }
611
612         spin_unlock(&c->erase_completion_lock);
613         *tnp = ret_tn;
614         *fdp = ret_fd;
615         kfree(buf);
616
617         dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
618                         f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
619         return 0;
620
621  free_out:
622         jffs2_free_tmp_dnode_info_list(&ret_tn);
623         jffs2_free_full_dirent_list(ret_fd);
624         kfree(buf);
625         return err;
626 }
627
628 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
629                                         struct jffs2_inode_info *f,
630                                         struct jffs2_raw_inode *latest_node)
631 {
632         struct jffs2_tmp_dnode_info *tn;
633         struct rb_root tn_list;
634         struct rb_node *rb, *repl_rb;
635         struct jffs2_full_dirent *fd_list;
636         struct jffs2_full_dnode *fn, *first_fn = NULL;
637         uint32_t crc;
638         uint32_t latest_mctime, mctime_ver;
639         size_t retlen;
640         int ret;
641
642         dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
643
644         /* Grab all nodes relevant to this ino */
645         ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
646
647         if (ret) {
648                 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
649                 if (f->inocache->state == INO_STATE_READING)
650                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
651                 return ret;
652         }
653         f->dents = fd_list;
654
655         rb = rb_first(&tn_list);
656
657         while (rb) {
658                 cond_resched();
659                 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
660                 fn = tn->fn;
661                 ret = 1;
662                 dbg_readinode("consider node ver %u, phys offset "
663                         "%#08x(%d), range %u-%u.\n", tn->version,
664                         ref_offset(fn->raw), ref_flags(fn->raw),
665                         fn->ofs, fn->ofs + fn->size);
666
667                 if (fn->size) {
668                         ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
669                         /* TODO: the error code isn't checked, check it */
670                         jffs2_dbg_fragtree_paranoia_check_nolock(f);
671                         BUG_ON(ret < 0);
672                         if (!first_fn && ret == 0)
673                                 first_fn = fn;
674                 } else if (!first_fn) {
675                         first_fn = fn;
676                         f->metadata = fn;
677                         ret = 0; /* Prevent freeing the metadata update node */
678                 } else
679                         jffs2_mark_node_obsolete(c, fn->raw);
680
681                 BUG_ON(rb->rb_left);
682                 if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
683                         /* We were then left-hand child of our parent. We need
684                          * to move our own right-hand child into our place. */
685                         repl_rb = rb->rb_right;
686                         if (repl_rb)
687                                 repl_rb->rb_parent = rb->rb_parent;
688                 } else
689                         repl_rb = NULL;
690
691                 rb = rb_next(rb);
692
693                 /* Remove the spent tn from the tree; don't bother rebalancing
694                  * but put our right-hand child in our own place. */
695                 if (tn->rb.rb_parent) {
696                         if (tn->rb.rb_parent->rb_left == &tn->rb)
697                                 tn->rb.rb_parent->rb_left = repl_rb;
698                         else if (tn->rb.rb_parent->rb_right == &tn->rb)
699                                 tn->rb.rb_parent->rb_right = repl_rb;
700                         else BUG();
701                 } else if (tn->rb.rb_right)
702                         tn->rb.rb_right->rb_parent = NULL;
703
704                 jffs2_free_tmp_dnode_info(tn);
705                 if (ret) {
706                         dbg_readinode("delete dnode %u-%u.\n",
707                                 fn->ofs, fn->ofs + fn->size);
708                         jffs2_free_full_dnode(fn);
709                 }
710         }
711         jffs2_dbg_fragtree_paranoia_check_nolock(f);
712
713         BUG_ON(first_fn && ref_obsolete(first_fn->raw));
714
715         fn = first_fn;
716         if (unlikely(!first_fn)) {
717                 /* No data nodes for this inode. */
718                 if (f->inocache->ino != 1) {
719                         JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
720                         if (!fd_list) {
721                                 if (f->inocache->state == INO_STATE_READING)
722                                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
723                                 return -EIO;
724                         }
725                         JFFS2_NOTICE("but it has children so we fake some modes for it\n");
726                 }
727                 latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
728                 latest_node->version = cpu_to_je32(0);
729                 latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
730                 latest_node->isize = cpu_to_je32(0);
731                 latest_node->gid = cpu_to_je16(0);
732                 latest_node->uid = cpu_to_je16(0);
733                 if (f->inocache->state == INO_STATE_READING)
734                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
735                 return 0;
736         }
737
738         ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
739         if (ret || retlen != sizeof(*latest_node)) {
740                 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
741                         ret, retlen, sizeof(*latest_node));
742                 /* FIXME: If this fails, there seems to be a memory leak. Find it. */
743                 up(&f->sem);
744                 jffs2_do_clear_inode(c, f);
745                 return ret?ret:-EIO;
746         }
747
748         crc = crc32(0, latest_node, sizeof(*latest_node)-8);
749         if (crc != je32_to_cpu(latest_node->node_crc)) {
750                 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
751                         f->inocache->ino, ref_offset(fn->raw));
752                 up(&f->sem);
753                 jffs2_do_clear_inode(c, f);
754                 return -EIO;
755         }
756
757         switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
758         case S_IFDIR:
759                 if (mctime_ver > je32_to_cpu(latest_node->version)) {
760                         /* The times in the latest_node are actually older than
761                            mctime in the latest dirent. Cheat. */
762                         latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
763                 }
764                 break;
765
766
767         case S_IFREG:
768                 /* If it was a regular file, truncate it to the latest node's isize */
769                 jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
770                 break;
771
772         case S_IFLNK:
773                 /* Hack to work around broken isize in old symlink code.
774                    Remove this when dwmw2 comes to his senses and stops
775                    symlinks from being an entirely gratuitous special
776                    case. */
777                 if (!je32_to_cpu(latest_node->isize))
778                         latest_node->isize = latest_node->dsize;
779
780                 if (f->inocache->state != INO_STATE_CHECKING) {
781                         /* Symlink's inode data is the target path. Read it and
782                          * keep in RAM to facilitate quick follow symlink
783                          * operation. */
784                         f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
785                         if (!f->target) {
786                                 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
787                                 up(&f->sem);
788                                 jffs2_do_clear_inode(c, f);
789                                 return -ENOMEM;
790                         }
791
792                         ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
793                                                 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
794
795                         if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
796                                 if (retlen != je32_to_cpu(latest_node->csize))
797                                         ret = -EIO;
798                                 kfree(f->target);
799                                 f->target = NULL;
800                                 up(&f->sem);
801                                 jffs2_do_clear_inode(c, f);
802                                 return -ret;
803                         }
804
805                         f->target[je32_to_cpu(latest_node->csize)] = '\0';
806                         dbg_readinode("symlink's target '%s' cached\n", f->target);
807                 }
808
809                 /* fall through... */
810
811         case S_IFBLK:
812         case S_IFCHR:
813                 /* Certain inode types should have only one data node, and it's
814                    kept as the metadata node */
815                 if (f->metadata) {
816                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
817                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
818                         up(&f->sem);
819                         jffs2_do_clear_inode(c, f);
820                         return -EIO;
821                 }
822                 if (!frag_first(&f->fragtree)) {
823                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
824                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
825                         up(&f->sem);
826                         jffs2_do_clear_inode(c, f);
827                         return -EIO;
828                 }
829                 /* ASSERT: f->fraglist != NULL */
830                 if (frag_next(frag_first(&f->fragtree))) {
831                         JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
832                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
833                         /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
834                         up(&f->sem);
835                         jffs2_do_clear_inode(c, f);
836                         return -EIO;
837                 }
838                 /* OK. We're happy */
839                 f->metadata = frag_first(&f->fragtree)->node;
840                 jffs2_free_node_frag(frag_first(&f->fragtree));
841                 f->fragtree = RB_ROOT;
842                 break;
843         }
844         if (f->inocache->state == INO_STATE_READING)
845                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
846
847         return 0;
848 }
849
850 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
851 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
852                         uint32_t ino, struct jffs2_raw_inode *latest_node)
853 {
854         dbg_readinode("read inode #%u\n", ino);
855
856  retry_inocache:
857         spin_lock(&c->inocache_lock);
858         f->inocache = jffs2_get_ino_cache(c, ino);
859
860         if (f->inocache) {
861                 /* Check its state. We may need to wait before we can use it */
862                 switch(f->inocache->state) {
863                 case INO_STATE_UNCHECKED:
864                 case INO_STATE_CHECKEDABSENT:
865                         f->inocache->state = INO_STATE_READING;
866                         break;
867
868                 case INO_STATE_CHECKING:
869                 case INO_STATE_GC:
870                         /* If it's in either of these states, we need
871                            to wait for whoever's got it to finish and
872                            put it back. */
873                         dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
874                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
875                         goto retry_inocache;
876
877                 case INO_STATE_READING:
878                 case INO_STATE_PRESENT:
879                         /* Eep. This should never happen. It can
880                         happen if Linux calls read_inode() again
881                         before clear_inode() has finished though. */
882                         JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
883                         /* Fail. That's probably better than allowing it to succeed */
884                         f->inocache = NULL;
885                         break;
886
887                 default:
888                         BUG();
889                 }
890         }
891         spin_unlock(&c->inocache_lock);
892
893         if (!f->inocache && ino == 1) {
894                 /* Special case - no root inode on medium */
895                 f->inocache = jffs2_alloc_inode_cache();
896                 if (!f->inocache) {
897                         JFFS2_ERROR("cannot allocate inocache for root inode\n");
898                         return -ENOMEM;
899                 }
900                 dbg_readinode("creating inocache for root inode\n");
901                 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
902                 f->inocache->ino = f->inocache->nlink = 1;
903                 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
904                 f->inocache->state = INO_STATE_READING;
905                 jffs2_add_ino_cache(c, f->inocache);
906         }
907         if (!f->inocache) {
908                 JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
909                 return -ENOENT;
910         }
911
912         return jffs2_do_read_inode_internal(c, f, latest_node);
913 }
914
915 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
916 {
917         struct jffs2_raw_inode n;
918         struct jffs2_inode_info *f = kmalloc(sizeof(*f), GFP_KERNEL);
919         int ret;
920
921         if (!f)
922                 return -ENOMEM;
923
924         memset(f, 0, sizeof(*f));
925         init_MUTEX_LOCKED(&f->sem);
926         f->inocache = ic;
927
928         ret = jffs2_do_read_inode_internal(c, f, &n);
929         if (!ret) {
930                 up(&f->sem);
931                 jffs2_do_clear_inode(c, f);
932         }
933         kfree (f);
934         return ret;
935 }
936
937 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
938 {
939         struct jffs2_full_dirent *fd, *fds;
940         int deleted;
941
942         down(&f->sem);
943         deleted = f->inocache && !f->inocache->nlink;
944
945         if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
946                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
947
948         if (f->metadata) {
949                 if (deleted)
950                         jffs2_mark_node_obsolete(c, f->metadata->raw);
951                 jffs2_free_full_dnode(f->metadata);
952         }
953
954         jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
955
956         if (f->target) {
957                 kfree(f->target);
958                 f->target = NULL;
959         }
960
961         fds = f->dents;
962         while(fds) {
963                 fd = fds;
964                 fds = fd->next;
965                 jffs2_free_full_dirent(fd);
966         }
967
968         if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
969                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
970                 if (f->inocache->nodes == (void *)f->inocache)
971                         jffs2_del_ino_cache(c, f->inocache);
972         }
973
974         up(&f->sem);
975 }