2 * Copyright (c) International Business Machines Corp., 2006
3 * Copyright (c) Nokia Corporation, 2006, 2007
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * Author: Artem Bityutskiy (Битюцкий Артём)
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/list.h>
28 #include <linux/rbtree.h>
29 #include <linux/sched.h>
30 #include <linux/wait.h>
31 #include <linux/mutex.h>
32 #include <linux/rwsem.h>
33 #include <linux/spinlock.h>
35 #include <linux/cdev.h>
36 #include <linux/device.h>
37 #include <linux/string.h>
38 #include <linux/vmalloc.h>
39 #include <linux/mtd/mtd.h>
40 #include <linux/mtd/ubi.h>
41 #include <linux/notifier.h>
43 #include "ubi-media.h"
47 /* Maximum number of supported UBI devices */
48 #define UBI_MAX_DEVICES 32
50 /* UBI name used for character devices, sysfs, etc */
51 #define UBI_NAME_STR "ubi"
53 /* Normal UBI messages */
54 #define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__)
55 /* UBI warning messages */
56 #define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \
57 __func__, ##__VA_ARGS__)
58 /* UBI error messages */
59 #define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \
60 __func__, ##__VA_ARGS__)
62 /* Lowest number PEBs reserved for bad PEB handling */
63 #define MIN_RESEVED_PEBS 2
65 /* Background thread name pattern */
66 #define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"
68 /* This marker in the EBA table means that the LEB is um-mapped */
69 #define UBI_LEB_UNMAPPED -1
72 * In case of errors, UBI tries to repeat the operation several times before
73 * returning error. The below constant defines how many times UBI re-tries.
75 #define UBI_IO_RETRIES 3
78 * Length of the protection queue. The length is effectively equivalent to the
79 * number of (global) erase cycles PEBs are protected from the wear-leveling
82 #define UBI_PROT_QUEUE_LEN 10
85 * Error codes returned by the I/O sub-system.
87 * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
89 * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
90 * valid erase counter header, and the rest are %0xFF bytes
91 * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
92 * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
94 * UBI_IO_BITFLIPS: bit-flips were detected and corrected
105 * Return codes of the 'ubi_eba_copy_leb()' function.
107 * MOVE_CANCEL_RACE: canceled because the volume is being deleted, the source
108 * PEB was put meanwhile, or there is I/O on the source PEB
109 * MOVE_SOURCE_RD_ERR: canceled because there was a read error from the source
111 * MOVE_TARGET_RD_ERR: canceled because there was a read error from the target
113 * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target
115 * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the
119 MOVE_CANCEL_RACE = 1,
123 MOVE_CANCEL_BITFLIPS,
127 * struct ubi_wl_entry - wear-leveling entry.
128 * @u.rb: link in the corresponding (free/used) RB-tree
129 * @u.list: link in the protection queue
131 * @pnum: physical eraseblock number
133 * This data structure is used in the WL sub-system. Each physical eraseblock
134 * has a corresponding &struct wl_entry object which may be kept in different
135 * RB-trees. See WL sub-system for details.
137 struct ubi_wl_entry {
140 struct list_head list;
147 * struct ubi_ltree_entry - an entry in the lock tree.
148 * @rb: links RB-tree nodes
149 * @vol_id: volume ID of the locked logical eraseblock
150 * @lnum: locked logical eraseblock number
151 * @users: how many tasks are using this logical eraseblock or wait for it
152 * @mutex: read/write mutex to implement read/write access serialization to
153 * the (@vol_id, @lnum) logical eraseblock
155 * This data structure is used in the EBA sub-system to implement per-LEB
156 * locking. When a logical eraseblock is being locked - corresponding
157 * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
158 * See EBA sub-system for details.
160 struct ubi_ltree_entry {
165 struct rw_semaphore mutex;
169 * struct ubi_rename_entry - volume re-name description data structure.
170 * @new_name_len: new volume name length
171 * @new_name: new volume name
172 * @remove: if not zero, this volume should be removed, not re-named
173 * @desc: descriptor of the volume
174 * @list: links re-name entries into a list
176 * This data structure is utilized in the multiple volume re-name code. Namely,
177 * UBI first creates a list of &struct ubi_rename_entry objects from the
178 * &struct ubi_rnvol_req request object, and then utilizes this list to do all
181 struct ubi_rename_entry {
183 char new_name[UBI_VOL_NAME_MAX + 1];
185 struct ubi_volume_desc *desc;
186 struct list_head list;
189 struct ubi_volume_desc;
192 * struct ubi_volume - UBI volume description data structure.
193 * @dev: device object to make use of the the Linux device model
194 * @cdev: character device object to create character device
195 * @ubi: reference to the UBI device description object
197 * @ref_count: volume reference count
198 * @readers: number of users holding this volume in read-only mode
199 * @writers: number of users holding this volume in read-write mode
200 * @exclusive: whether somebody holds this volume in exclusive mode
202 * @reserved_pebs: how many physical eraseblocks are reserved for this volume
203 * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
204 * @usable_leb_size: logical eraseblock size without padding
205 * @used_ebs: how many logical eraseblocks in this volume contain data
206 * @last_eb_bytes: how many bytes are stored in the last logical eraseblock
207 * @used_bytes: how many bytes of data this volume contains
208 * @alignment: volume alignment
209 * @data_pad: how many bytes are not used at the end of physical eraseblocks to
210 * satisfy the requested alignment
211 * @name_len: volume name length
214 * @upd_ebs: how many eraseblocks are expected to be updated
215 * @ch_lnum: LEB number which is being changing by the atomic LEB change
217 * @ch_dtype: data persistency type which is being changing by the atomic LEB
219 * @upd_bytes: how many bytes are expected to be received for volume update or
221 * @upd_received: how many bytes were already received for volume update or
223 * @upd_buf: update buffer which is used to collect update data or data for
226 * @eba_tbl: EBA table of this volume (LEB->PEB mapping)
227 * @checked: %1 if this static volume was checked
228 * @corrupted: %1 if the volume is corrupted (static volumes only)
229 * @upd_marker: %1 if the update marker is set for this volume
230 * @updating: %1 if the volume is being updated
231 * @changing_leb: %1 if the atomic LEB change ioctl command is in progress
232 * @direct_writes: %1 if direct writes are enabled for this volume
234 * The @corrupted field indicates that the volume's contents is corrupted.
235 * Since UBI protects only static volumes, this field is not relevant to
236 * dynamic volumes - it is user's responsibility to assure their data
239 * The @upd_marker flag indicates that this volume is either being updated at
240 * the moment or is damaged because of an unclean reboot.
245 struct ubi_device *ubi;
257 long long used_bytes;
261 char name[UBI_VOL_NAME_MAX + 1];
267 long long upd_received;
271 unsigned int checked:1;
272 unsigned int corrupted:1;
273 unsigned int upd_marker:1;
274 unsigned int updating:1;
275 unsigned int changing_leb:1;
276 unsigned int direct_writes:1;
280 * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
281 * @vol: reference to the corresponding volume description object
282 * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
284 struct ubi_volume_desc {
285 struct ubi_volume *vol;
292 * struct ubi_device - UBI device description structure
293 * @dev: UBI device object to use the the Linux device model
294 * @cdev: character device object to create character device
295 * @ubi_num: UBI device number
296 * @ubi_name: UBI device name
297 * @vol_count: number of volumes in this UBI device
298 * @volumes: volumes of this UBI device
299 * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs,
300 * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count,
301 * @vol->readers, @vol->writers, @vol->exclusive,
302 * @vol->ref_count, @vol->mapping and @vol->eba_tbl.
303 * @ref_count: count of references on the UBI device
305 * @rsvd_pebs: count of reserved physical eraseblocks
306 * @avail_pebs: count of available physical eraseblocks
307 * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB
309 * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
311 * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end
312 * of UBI initialization
313 * @vtbl_slots: how many slots are available in the volume table
314 * @vtbl_size: size of the volume table in bytes
315 * @vtbl: in-RAM volume table copy
316 * @device_mutex: protects on-flash volume table and serializes volume
317 * creation, deletion, update, re-size, re-name and set
320 * @max_ec: current highest erase counter value
321 * @mean_ec: current mean erase counter value
323 * @global_sqnum: global sequence number
324 * @ltree_lock: protects the lock tree and @global_sqnum
325 * @ltree: the lock tree
326 * @alc_mutex: serializes "atomic LEB change" operations
328 * @used: RB-tree of used physical eraseblocks
329 * @erroneous: RB-tree of erroneous used physical eraseblocks
330 * @free: RB-tree of free physical eraseblocks
331 * @scrub: RB-tree of physical eraseblocks which need scrubbing
332 * @pq: protection queue (contain physical eraseblocks which are temporarily
333 * protected from the wear-leveling worker)
334 * @pq_head: protection queue head
335 * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
336 * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
337 * @erroneous, and @erroneous_peb_count fields
338 * @move_mutex: serializes eraseblock moves
339 * @work_sem: synchronizes the WL worker with use tasks
340 * @wl_scheduled: non-zero if the wear-leveling was scheduled
341 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
342 * physical eraseblock
343 * @move_from: physical eraseblock from where the data is being moved
344 * @move_to: physical eraseblock where the data is being moved to
345 * @move_to_put: if the "to" PEB was put
346 * @works: list of pending works
347 * @works_count: count of pending works
348 * @bgt_thread: background thread description object
349 * @thread_enabled: if the background thread is enabled
350 * @bgt_name: background thread name
352 * @flash_size: underlying MTD device size (in bytes)
353 * @peb_count: count of physical eraseblocks on the MTD device
354 * @peb_size: physical eraseblock size
355 * @bad_peb_count: count of bad physical eraseblocks
356 * @good_peb_count: count of good physical eraseblocks
357 * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
358 * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
359 * @min_io_size: minimal input/output unit size of the underlying MTD device
360 * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers
361 * @ro_mode: if the UBI device is in read-only mode
362 * @leb_size: logical eraseblock size
363 * @leb_start: starting offset of logical eraseblocks within physical
365 * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
366 * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
367 * @vid_hdr_offset: starting offset of the volume identifier header (might be
369 * @vid_hdr_aloffset: starting offset of the VID header aligned to
371 * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
372 * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
374 * @mtd: MTD device descriptor
376 * @peb_buf1: a buffer of PEB size used for different purposes
377 * @peb_buf2: another buffer of PEB size used for different purposes
378 * @buf_mutex: protects @peb_buf1 and @peb_buf2
379 * @ckvol_mutex: serializes static volume checking when opening
380 * @dbg_peb_buf: buffer of PEB size used for debugging
381 * @dbg_buf_mutex: protects @dbg_peb_buf
387 char ubi_name[sizeof(UBI_NAME_STR)+5];
389 struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT];
390 spinlock_t volumes_lock;
398 int autoresize_vol_id;
401 struct ubi_vtbl_record *vtbl;
402 struct mutex device_mutex;
405 /* Note, mean_ec is not updated run-time - should be fixed */
408 /* EBA sub-system's stuff */
409 unsigned long long global_sqnum;
410 spinlock_t ltree_lock;
411 struct rb_root ltree;
412 struct mutex alc_mutex;
414 /* Wear-leveling sub-system's stuff */
416 struct rb_root erroneous;
418 struct rb_root scrub;
419 struct list_head pq[UBI_PROT_QUEUE_LEN];
422 struct mutex move_mutex;
423 struct rw_semaphore work_sem;
425 struct ubi_wl_entry **lookuptbl;
426 struct ubi_wl_entry *move_from;
427 struct ubi_wl_entry *move_to;
429 struct list_head works;
431 struct task_struct *bgt_thread;
433 char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
435 /* I/O sub-system's stuff */
436 long long flash_size;
441 int erroneous_peb_count;
444 int hdrs_min_io_size;
451 int vid_hdr_aloffset;
454 struct mtd_info *mtd;
458 struct mutex buf_mutex;
459 struct mutex ckvol_mutex;
460 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
462 struct mutex dbg_buf_mutex;
466 extern struct kmem_cache *ubi_wl_entry_slab;
467 extern const struct file_operations ubi_ctrl_cdev_operations;
468 extern const struct file_operations ubi_cdev_operations;
469 extern const struct file_operations ubi_vol_cdev_operations;
470 extern struct class *ubi_class;
471 extern struct mutex ubi_devices_mutex;
472 extern struct blocking_notifier_head ubi_notifiers;
475 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
476 struct ubi_vtbl_record *vtbl_rec);
477 int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
478 struct list_head *rename_list);
479 int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
482 int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
483 int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
484 int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
485 int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
486 int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
487 void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
490 int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
492 int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
493 const void __user *buf, int count);
494 int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
495 const struct ubi_leb_change_req *req);
496 int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
497 const void __user *buf, int count);
500 int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
502 int ubi_check_volume(struct ubi_device *ubi, int vol_id);
503 void ubi_calculate_reserved(struct ubi_device *ubi);
506 int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
508 int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
509 void *buf, int offset, int len, int check);
510 int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
511 const void *buf, int offset, int len, int dtype);
512 int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
513 int lnum, const void *buf, int len, int dtype,
515 int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
516 int lnum, const void *buf, int len, int dtype);
517 int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
518 struct ubi_vid_hdr *vid_hdr);
519 int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
522 int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
523 int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture);
524 int ubi_wl_flush(struct ubi_device *ubi);
525 int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum);
526 int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
527 void ubi_wl_close(struct ubi_device *ubi);
528 int ubi_thread(void *u);
531 int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
533 int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset,
535 int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture);
536 int ubi_io_is_bad(const struct ubi_device *ubi, int pnum);
537 int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum);
538 int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
539 struct ubi_ec_hdr *ec_hdr, int verbose);
540 int ubi_io_write_ec_hdr(struct ubi_device *ubi, int pnum,
541 struct ubi_ec_hdr *ec_hdr);
542 int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
543 struct ubi_vid_hdr *vid_hdr, int verbose);
544 int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum,
545 struct ubi_vid_hdr *vid_hdr);
548 int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset);
549 int ubi_detach_mtd_dev(int ubi_num, int anyway);
550 struct ubi_device *ubi_get_device(int ubi_num);
551 void ubi_put_device(struct ubi_device *ubi);
552 struct ubi_device *ubi_get_by_major(int major);
553 int ubi_major2num(int major);
554 int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol,
556 int ubi_notify_all(struct ubi_device *ubi, int ntype,
557 struct notifier_block *nb);
558 int ubi_enumerate_volumes(struct notifier_block *nb);
561 void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
562 void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
563 struct ubi_volume_info *vi);
566 * ubi_rb_for_each_entry - walk an RB-tree.
567 * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
568 * @pos: a pointer to RB-tree entry type to use as a loop counter
569 * @root: RB-tree's root
570 * @member: the name of the 'struct rb_node' within the RB-tree entry
572 #define ubi_rb_for_each_entry(rb, pos, root, member) \
573 for (rb = rb_first(root), \
574 pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \
576 rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
579 * ubi_zalloc_vid_hdr - allocate a volume identifier header object.
580 * @ubi: UBI device description object
581 * @gfp_flags: GFP flags to allocate with
583 * This function returns a pointer to the newly allocated and zero-filled
584 * volume identifier header object in case of success and %NULL in case of
587 static inline struct ubi_vid_hdr *
588 ubi_zalloc_vid_hdr(const struct ubi_device *ubi, gfp_t gfp_flags)
592 vid_hdr = kzalloc(ubi->vid_hdr_alsize, gfp_flags);
597 * VID headers may be stored at un-aligned flash offsets, so we shift
600 return vid_hdr + ubi->vid_hdr_shift;
604 * ubi_free_vid_hdr - free a volume identifier header object.
605 * @ubi: UBI device description object
606 * @vid_hdr: the object to free
608 static inline void ubi_free_vid_hdr(const struct ubi_device *ubi,
609 struct ubi_vid_hdr *vid_hdr)
616 kfree(p - ubi->vid_hdr_shift);
620 * This function is equivalent to 'ubi_io_read()', but @offset is relative to
621 * the beginning of the logical eraseblock, not to the beginning of the
622 * physical eraseblock.
624 static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf,
625 int pnum, int offset, int len)
627 ubi_assert(offset >= 0);
628 return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len);
632 * This function is equivalent to 'ubi_io_write()', but @offset is relative to
633 * the beginning of the logical eraseblock, not to the beginning of the
634 * physical eraseblock.
636 static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf,
637 int pnum, int offset, int len)
639 ubi_assert(offset >= 0);
640 return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len);
644 * ubi_ro_mode - switch to read-only mode.
645 * @ubi: UBI device description object
647 static inline void ubi_ro_mode(struct ubi_device *ubi)
651 ubi_warn("switch to read-only mode");
656 * vol_id2idx - get table index by volume ID.
657 * @ubi: UBI device description object
660 static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id)
662 if (vol_id >= UBI_INTERNAL_VOL_START)
663 return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots;
669 * idx2vol_id - get volume ID by table index.
670 * @ubi: UBI device description object
673 static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
675 if (idx >= ubi->vtbl_slots)
676 return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START;
681 #endif /* !__UBI_UBI_H__ */