]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'security/next'
authorThierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:36:59 +0000 (14:36 +0200)
committerThierry Reding <treding@nvidia.com>
Thu, 24 Oct 2013 12:36:59 +0000 (14:36 +0200)
84 files changed:
Documentation/assoc_array.txt [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/trivial-devices.txt
Documentation/security/keys.txt
MAINTAINERS
crypto/asymmetric_keys/Kconfig
crypto/asymmetric_keys/asymmetric_type.c
crypto/asymmetric_keys/public_key.c
crypto/asymmetric_keys/public_key.h
crypto/asymmetric_keys/x509_cert_parser.c
crypto/asymmetric_keys/x509_parser.h
crypto/asymmetric_keys/x509_public_key.c
drivers/char/tpm/Kconfig
drivers/char/tpm/Makefile
drivers/char/tpm/tpm-interface.c [moved from drivers/char/tpm/tpm.c with 93% similarity]
drivers/char/tpm/tpm.h
drivers/char/tpm/tpm_atmel.c
drivers/char/tpm/tpm_eventlog.c
drivers/char/tpm/tpm_i2c_atmel.c [new file with mode: 0644]
drivers/char/tpm/tpm_i2c_infineon.c
drivers/char/tpm/tpm_i2c_nuvoton.c [new file with mode: 0644]
drivers/char/tpm/tpm_i2c_stm_st33.c
drivers/char/tpm/tpm_ibmvtpm.c
drivers/char/tpm/tpm_ppi.c
drivers/char/tpm/tpm_tis.c
drivers/char/tpm/xen-tpmfront.c
include/crypto/public_key.h
include/keys/big_key-type.h [new file with mode: 0644]
include/keys/keyring-type.h
include/keys/system_keyring.h [new file with mode: 0644]
include/linux/assoc_array.h [new file with mode: 0644]
include/linux/assoc_array_priv.h [new file with mode: 0644]
include/linux/key-type.h
include/linux/key.h
include/linux/security.h
include/linux/user_namespace.h
include/uapi/linux/keyctl.h
init/Kconfig
kernel/Makefile
kernel/modsign_certificate.S [deleted file]
kernel/modsign_pubkey.c [deleted file]
kernel/module-internal.h
kernel/module_signing.c
kernel/system_certificates.S [new file with mode: 0644]
kernel/system_keyring.c [new file with mode: 0644]
kernel/user.c
kernel/user_namespace.c
lib/Kconfig
lib/Makefile
lib/assoc_array.c [new file with mode: 0644]
lib/mpi/mpiutil.c
scripts/asn1_compiler.c
security/Makefile
security/capability.c
security/keys/Kconfig
security/keys/Makefile
security/keys/big_key.c [new file with mode: 0644]
security/keys/compat.c
security/keys/gc.c
security/keys/internal.h
security/keys/key.c
security/keys/keyctl.c
security/keys/keyring.c
security/keys/persistent.c [new file with mode: 0644]
security/keys/proc.c
security/keys/process_keys.c
security/keys/request_key.c
security/keys/request_key_auth.c
security/keys/sysctl.c
security/keys/user_defined.c
security/security.c
security/selinux/hooks.c
security/selinux/include/objsec.h
security/selinux/include/security.h
security/selinux/include/xfrm.h
security/selinux/netlabel.c
security/selinux/netnode.c
security/selinux/selinuxfs.c
security/selinux/ss/ebitmap.c
security/selinux/ss/ebitmap.h
security/selinux/ss/mls.c
security/selinux/ss/mls_types.h
security/selinux/ss/policydb.c
security/selinux/ss/services.c
security/selinux/xfrm.c

diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644 (file)
index 0000000..f4faec0
--- /dev/null
@@ -0,0 +1,574 @@
+                  ========================================
+                  GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+                  ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+   - Edit script.
+   - Operations table.
+   - Manipulation functions.
+   - Access functions.
+   - Index key form.
+
+ - Internal workings.
+   - Basic internal tree layout.
+   - Shortcuts.
+   - Splitting and collapsing nodes.
+   - Non-recursive iteration.
+   - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers.  The implementation does not care where they
+     point (if anywhere) or what they point to (if anything).
+
+     [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array.  This
+     permits an object to be located in multiple arrays simultaneously.
+     Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique.  Inserting an object with the same key as one
+     already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+     length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over.  The objects will not necessarily come out in
+     key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+     RCU readlock is being held by the iterator.  Note, however, under these
+     circumstances, some objects may be seen more than once.  If this is a
+     problem, the iterator should lock against modification.  Objects will not
+     be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+     RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree.  To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes.  Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>.  The associative array is
+rooted on the following structure:
+
+       struct assoc_array {
+               ...
+       };
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM.  This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later.  The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+       struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+       void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+     This will perform the edit functions, interpolating various write barriers
+     to permit accesses under the RCU read lock to continue.  The edit script
+     will then be passed to call_rcu() to free it and any dead stuff it points
+     to.
+
+ (2) Cancel an edit script.
+
+       void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+     This frees the edit script and all preallocated memory immediately.  If
+     this was for insertion, the new object is _not_ released by this function,
+     but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+       struct assoc_array_ops {
+               ...
+       };
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+     This should return a chunk of caller-supplied index key starting at the
+     *bit* position given by the level argument.  The level argument will be a
+     multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+     ASSOC_ARRAY_KEY_CHUNK_SIZE bits.  No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+     As the previous function, but gets its data from an object in the array
+     rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+       bool (*compare_object)(const void *object, const void *index_key);
+
+     Compare the object against an index key and return true if it matches and
+     false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+       int (*diff_objects)(const void *a, const void *b);
+
+     Return the bit position at which the index keys of two objects differ or
+     -1 if they are the same.
+
+
+ (5) Free an object.
+
+       void (*free_object)(void *object);
+
+     Free the specified object.  Note that this may be called an RCU grace
+     period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+     be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+       void assoc_array_init(struct assoc_array *array);
+
+     This initialises the base structure for an associative array.  It can't
+     fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_insert(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key,
+                          void *object);
+
+     This inserts the given object into the array.  Note that the least
+     significant bit of the pointer must be zero as it's used to type-mark
+     pointers internally.
+
+     If an object already exists for that key then it will be replaced with the
+     new object and the old one will be freed automatically.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_delete(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          const void *index_key);
+
+     This deletes an object that matches the specified data from the array.
+
+     The index_key argument should hold index key information and is
+     passed to the methods in the ops table when they are called.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.  NULL will be returned if the specified object is
+     not found within the array.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+       struct assoc_array_edit *
+       assoc_array_clear(struct assoc_array *array,
+                         const struct assoc_array_ops *ops);
+
+     This deletes all the objects from an associative array and leaves it
+     completely empty.
+
+     This function makes no alteration to the array itself, but rather returns
+     an edit script that must be applied.  -ENOMEM is returned in the case of
+     an out-of-memory error.
+
+     The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+       void assoc_array_destroy(struct assoc_array *array,
+                                const struct assoc_array_ops *ops);
+
+     This destroys the contents of the associative array and leaves it
+     completely empty.  It is not permitted for another thread to be traversing
+     the array under the RCU read lock at the same time as this function is
+     destroying it as no RCU deferral is performed on memory release -
+     something that would require memory to be allocated.
+
+     The caller should lock exclusively against other modifiers and accessors
+     of the array.
+
+
+ (6) Garbage collect an associative array.
+
+       int assoc_array_gc(struct assoc_array *array,
+                          const struct assoc_array_ops *ops,
+                          bool (*iterator)(void *object, void *iterator_data),
+                          void *iterator_data);
+
+     This iterates over the objects in an associative array and passes each one
+     to iterator().  If iterator() returns true, the object is kept.  If it
+     returns false, the object will be freed.  If the iterator() function
+     returns true, it must perform any appropriate refcount incrementing on the
+     object before returning.
+
+     The internal tree will be packed down if possible as part of the iteration
+     to reduce the number of nodes in it.
+
+     The iterator_data is passed directly to iterator() and is otherwise
+     ignored by the function.
+
+     The function will return 0 if successful and -ENOMEM if there wasn't
+     enough memory.
+
+     It is possible for other threads to iterate over or search the array under
+     the RCU read lock whilst this function is in progress.  The caller should
+     lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+       int assoc_array_iterate(const struct assoc_array *array,
+                               int (*iterator)(const void *object,
+                                               void *iterator_data),
+                               void *iterator_data);
+
+     This passes each object in the array to the iterator callback function.
+     iterator_data is private data for that function.
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.  Under such circumstances,
+     it is possible for the iteration function to see some objects twice.  If
+     this is a problem, then modification should be locked against.  The
+     iteration algorithm should not, however, miss any objects.
+
+     The function will return 0 if no objects were in the array or else it will
+     return the result of the last iterator function called.  Iteration stops
+     immediately if any call to the iteration function results in a non-zero
+     return.
+
+
+ (2) Find an object in an associative array.
+
+       void *assoc_array_find(const struct assoc_array *array,
+                              const struct assoc_array_ops *ops,
+                              const void *index_key);
+
+     This walks through the array's internal tree directly to the object
+     specified by the index key..
+
+     This may be used on an array at the same time as the array is being
+     modified, provided the RCU read lock is held.
+
+     The function will return the object if found (and set *_type to the object
+     type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word.  Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels.  Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree.  This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots.  Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree.  The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels.  For example:
+
+ Level:        0               1               2               3
+       =============== =============== =============== ===============
+                                                       NODE D
+                       NODE B          NODE C  +------>+---+
+               +------>+---+   +------>+---+   |       | 0 |
+       NODE A  |       | 0 |   |       | 0 |   |       +---+
+       +---+   |       +---+   |       +---+   |       :   :
+       | 0 |   |       :   :   |       :   :   |       +---+
+       +---+   |       +---+   |       +---+   |       | f |
+       | 1 |---+       | 3 |---+       | 7 |---+       +---+
+       +---+           +---+           +---+
+       :   :           :   :           | 8 |---+
+       +---+           +---+           +---+   |       NODE E
+       | e |---+       | f |           :   :   +------>+---+
+       +---+   |       +---+           +---+           | 0 |
+       | f |   |                       | f |           +---+
+       +---+   |                       +---+           :   :
+               |       NODE F                          +---+
+               +------>+---+                           | f |
+                       | 0 |           NODE G          +---+
+                       +---+   +------>+---+
+                       :   :   |       | 0 |
+                       +---+   |       +---+
+                       | 6 |---+       :   :
+                       +---+           +---+
+                       :   :           | f |
+                       +---+           +---+
+                       | f |
+                       +---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+       KEY PREFIX      NODE
+       ==========      ====
+       137*            D
+       138*            E
+       13[0-69-f]*     C
+       1[0-24-f]*      B
+       e6*             G
+       e[0-57-f]*      F
+       [02-df]*        A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+       INDEX KEY       PREFIX  NODE
+       =============== ======= ====
+       13694892892489  13      C
+       13795289025897  137     D
+       13889dde88793   138     E
+       138bbb89003093  138     E
+       1394879524789   12      C
+       1458952489      1       B
+       9431809de993ba  -       A
+       b4542910809cd   -       A
+       e5284310def98   e       F
+       e68428974237    e6      G
+       e7fffcbd443     e       F
+       f3842239082     -       A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space.  The leaves can be in any slot not occupied by a metadata pointer.  It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer.  If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+       SLOT    CONTENT         INDEX KEY (PREFIX)
+       ====    =============== ==================
+       1       PTR TO NODE B   1*
+       any     LEAF            9431809de993ba
+       any     LEAF            b4542910809cd
+       e       PTR TO NODE F   e*
+       any     LEAF            f3842239082
+
+and node B:
+
+       3       PTR TO NODE C   13*
+       any     LEAF            1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace.  A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels.  Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'.  The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers.  If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it.  None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace.  This involves simply replacing a NULL or old
+     matching leaf pointer with the pointer to the new leaf after a barrier.
+     The metadata blocks don't change otherwise.  An old leaf won't be freed
+     until after the RCU grace period.
+
+ (2) Simple delete.  This involves just clearing an old matching leaf.  The
+     metadata blocks don't change otherwise.  The old leaf won't be freed until
+     after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered.  This
+     may involve replacement of part of that subtree - but that won't affect
+     the iteration as we won't have reached the pointer to it yet and the
+     ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing.  This isn't a
+     problem as we've passed the anchoring pointer and won't switch onto the
+     new layout until we follow the back pointers - at which point we've
+     already examined the leaves in the replaced node (we iterate over all the
+     leaves in a node before following any of its metadata pointers).
+
+     We might, however, re-see some leaves that have been split out into a new
+     branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+     This won't affect us until we follow the back pointers.  Similar to (4).
+
+ (6) Deletion collapsing a branch under us.  This doesn't affect us because the
+     back pointers will get us back to the parent of the new node before we
+     could see the new node.  The entire collapsed subtree is thrown away
+     unchanged - and will still be rooted on the same slot, so we shouldn't
+     process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+     pointer and the parent slot pointer on a node (say, for example, we
+     inserted another node before it and moved it up a level).  We cannot do
+     this without locking against a read - so we have to replace that node too.
+
+     However, when we're changing a shortcut into a node this isn't a problem
+     as shortcuts only have one slot and so the parent slot number isn't used
+     when traversing backwards over one.  This means that it's okay to change
+     the slot number first - provided suitable barriers are used to make sure
+     the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
index ad6a73852f0880bf3625893d6ffc11646bdc8048..f1fb26eed0e9486f7226f58c6b14fce571c2f371 100644 (file)
@@ -15,6 +15,7 @@ adi,adt7461           +/-1C TDM Extended Temp Range I.C
 adt7461                        +/-1C TDM Extended Temp Range I.C
 at,24c08               i2c serial eeprom  (24cxx)
 atmel,24c02            i2c serial eeprom  (24cxx)
+atmel,at97sc3204t      i2c trusted platform module (TPM)
 catalyst,24c32         i2c serial eeprom
 dallas,ds1307          64 x 8, Serial, I2C Real-Time Clock
 dallas,ds1338          I2C RTC with 56-Byte NV RAM
@@ -44,6 +45,7 @@ mc,rv3029c2           Real Time Clock Module with I2C-Bus
 national,lm75          I2C TEMP SENSOR
 national,lm80          Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
 national,lm92          Â±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
+nuvoton,npct501                i2c trusted platform module (TPM)
 nxp,pca9556            Octal SMBus and I2C registered interface
 nxp,pca9557            8-bit I2C-bus and SMBus I/O port with reset
 nxp,pcf8563            Real-time clock/calendar
@@ -61,3 +63,4 @@ taos,tsl2550          Ambient Light Sensor with SMBUS/Two Wire Serial Interface
 ti,tsc2003             I2C Touch-Screen Controller
 ti,tmp102              Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
 ti,tmp275              Digital Temperature Sensor
+winbond,wpct301                i2c trusted platform module (TPM)
index 7b4145d00452f259fe79eff4f400ecfc949a6fd1..a4c33f1a7c6de5dc2207a21bab00846266668f90 100644 (file)
@@ -865,15 +865,14 @@ encountered:
      calling processes has a searchable link to the key from one of its
      keyrings. There are three functions for dealing with these:
 
-       key_ref_t make_key_ref(const struct key *key,
-                              unsigned long possession);
+       key_ref_t make_key_ref(const struct key *key, bool possession);
 
        struct key *key_ref_to_ptr(const key_ref_t key_ref);
 
-       unsigned long is_key_possessed(const key_ref_t key_ref);
+       bool is_key_possessed(const key_ref_t key_ref);
 
      The first function constructs a key reference from a key pointer and
-     possession information (which must be 0 or 1 and not any other value).
+     possession information (which must be true or false).
 
      The second function retrieves the key pointer from a reference and the
      third retrieves the possession flag.
@@ -961,14 +960,17 @@ payload contents" for more information.
     the argument will not be parsed.
 
 
-(*) Extra references can be made to a key by calling the following function:
+(*) Extra references can be made to a key by calling one of the following
+    functions:
 
+       struct key *__key_get(struct key *key);
        struct key *key_get(struct key *key);
 
-    These need to be disposed of by calling key_put() when they've been
-    finished with. The key pointer passed in will be returned. If the pointer
-    is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and
-    no increment will take place.
+    Keys so references will need to be disposed of by calling key_put() when
+    they've been finished with.  The key pointer passed in will be returned.
+
+    In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+    then the key will not be dereferenced and no increment will take place.
 
 
 (*) A key's serial number can be obtained by calling:
index 480d77d216200178df0f3c6a2111d4164be49287..10a90130579c8f603ff9998e82dca556ee847744 100644 (file)
@@ -7458,9 +7458,10 @@ SELINUX SECURITY MODULE
 M:     Stephen Smalley <sds@tycho.nsa.gov>
 M:     James Morris <james.l.morris@oracle.com>
 M:     Eric Paris <eparis@parisplace.org>
+M:     Paul Moore <paul@paul-moore.com>
 L:     selinux@tycho.nsa.gov (subscribers-only, general discussion)
 W:     http://selinuxproject.org
-T:     git git://git.infradead.org/users/eparis/selinux.git
+T:     git git://git.infradead.org/users/pcmoore/selinux
 S:     Supported
 F:     include/linux/selinux*
 F:     security/selinux/
@@ -8549,6 +8550,7 @@ F:        drivers/media/usb/tm6000/
 TPM DEVICE DRIVER
 M:     Leonidas Da Silva Barbosa <leosilva@linux.vnet.ibm.com>
 M:     Ashley Lai <ashley@ashleylai.com>
+M:     Peter Huewe <peterhuewe@gmx.de>
 M:     Rajiv Andrade <mail@srajiv.net>
 W:     http://tpmdd.sourceforge.net
 M:     Marcel Selhorst <tpmdd@selhorst.net>
index 6d2c2ea12559c57624b687a06e0b6f3f3ae4291a..862b01fe61726694f656a9fc16fc9a012906528b 100644 (file)
@@ -12,6 +12,7 @@ if ASYMMETRIC_KEY_TYPE
 config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
        tristate "Asymmetric public-key crypto algorithm subtype"
        select MPILIB
+       select PUBLIC_KEY_ALGO_RSA
        help
          This option provides support for asymmetric public key type handling.
          If signature generation and/or verification are to be used,
@@ -20,7 +21,6 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 
 config PUBLIC_KEY_ALGO_RSA
        tristate "RSA public-key algorithm"
-       depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
        select MPILIB_EXTRA
        help
          This option enables support for the RSA algorithm (PKCS#1, RFC3447).
index cf807654d221c80fced3280ae23bc666a2089348..b77eb53047882ad26de8de4892e8568a4b792326 100644 (file)
@@ -209,6 +209,7 @@ struct key_type key_type_asymmetric = {
        .match          = asymmetric_key_match,
        .destroy        = asymmetric_key_destroy,
        .describe       = asymmetric_key_describe,
+       .def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
index cb2e29180a87286321f593649365c2cb2fc95247..49ac8d848ed162eacf0aaacf1d883fe70059ba8c 100644 (file)
 
 MODULE_LICENSE("GPL");
 
-const char *const pkey_algo[PKEY_ALGO__LAST] = {
+const char *const pkey_algo_name[PKEY_ALGO__LAST] = {
        [PKEY_ALGO_DSA]         = "DSA",
        [PKEY_ALGO_RSA]         = "RSA",
 };
+EXPORT_SYMBOL_GPL(pkey_algo_name);
+
+const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST] = {
+#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
+       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
+       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
+#endif
+};
 EXPORT_SYMBOL_GPL(pkey_algo);
 
-const char *const pkey_hash_algo[PKEY_HASH__LAST] = {
+const char *const pkey_hash_algo_name[PKEY_HASH__LAST] = {
        [PKEY_HASH_MD4]         = "md4",
        [PKEY_HASH_MD5]         = "md5",
        [PKEY_HASH_SHA1]        = "sha1",
@@ -38,13 +46,13 @@ const char *const pkey_hash_algo[PKEY_HASH__LAST] = {
        [PKEY_HASH_SHA512]      = "sha512",
        [PKEY_HASH_SHA224]      = "sha224",
 };
-EXPORT_SYMBOL_GPL(pkey_hash_algo);
+EXPORT_SYMBOL_GPL(pkey_hash_algo_name);
 
-const char *const pkey_id_type[PKEY_ID_TYPE__LAST] = {
+const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST] = {
        [PKEY_ID_PGP]           = "PGP",
        [PKEY_ID_X509]          = "X509",
 };
-EXPORT_SYMBOL_GPL(pkey_id_type);
+EXPORT_SYMBOL_GPL(pkey_id_type_name);
 
 /*
  * Provide a part of a description of the key for /proc/keys.
@@ -56,7 +64,7 @@ static void public_key_describe(const struct key *asymmetric_key,
 
        if (key)
                seq_printf(m, "%s.%s",
-                          pkey_id_type[key->id_type], key->algo->name);
+                          pkey_id_type_name[key->id_type], key->algo->name);
 }
 
 /*
@@ -78,21 +86,45 @@ EXPORT_SYMBOL_GPL(public_key_destroy);
 /*
  * Verify a signature using a public key.
  */
-static int public_key_verify_signature(const struct key *key,
-                                      const struct public_key_signature *sig)
+int public_key_verify_signature(const struct public_key *pk,
+                               const struct public_key_signature *sig)
 {
-       const struct public_key *pk = key->payload.data;
+       const struct public_key_algorithm *algo;
+
+       BUG_ON(!pk);
+       BUG_ON(!pk->mpi[0]);
+       BUG_ON(!pk->mpi[1]);
+       BUG_ON(!sig);
+       BUG_ON(!sig->digest);
+       BUG_ON(!sig->mpi[0]);
+
+       algo = pk->algo;
+       if (!algo) {
+               if (pk->pkey_algo >= PKEY_ALGO__LAST)
+                       return -ENOPKG;
+               algo = pkey_algo[pk->pkey_algo];
+               if (!algo)
+                       return -ENOPKG;
+       }
 
-       if (!pk->algo->verify_signature)
+       if (!algo->verify_signature)
                return -ENOTSUPP;
 
-       if (sig->nr_mpi != pk->algo->n_sig_mpi) {
+       if (sig->nr_mpi != algo->n_sig_mpi) {
                pr_debug("Signature has %u MPI not %u\n",
-                        sig->nr_mpi, pk->algo->n_sig_mpi);
+                        sig->nr_mpi, algo->n_sig_mpi);
                return -EINVAL;
        }
 
-       return pk->algo->verify_signature(pk, sig);
+       return algo->verify_signature(pk, sig);
+}
+EXPORT_SYMBOL_GPL(public_key_verify_signature);
+
+static int public_key_verify_signature_2(const struct key *key,
+                                        const struct public_key_signature *sig)
+{
+       const struct public_key *pk = key->payload.data;
+       return public_key_verify_signature(pk, sig);
 }
 
 /*
@@ -103,6 +135,6 @@ struct asymmetric_key_subtype public_key_subtype = {
        .name                   = "public_key",
        .describe               = public_key_describe,
        .destroy                = public_key_destroy,
-       .verify_signature       = public_key_verify_signature,
+       .verify_signature       = public_key_verify_signature_2,
 };
 EXPORT_SYMBOL_GPL(public_key_subtype);
index 5e5e35626899e845bb673530854f9aa127336fd6..5c37a22a0637acdc7abf8f07c610c2f72dfd59bb 100644 (file)
@@ -28,3 +28,9 @@ struct public_key_algorithm {
 };
 
 extern const struct public_key_algorithm RSA_public_key_algorithm;
+
+/*
+ * public_key.c
+ */
+extern int public_key_verify_signature(const struct public_key *pk,
+                                      const struct public_key_signature *sig);
index facbf26bc6bbbc91eb879b0a5cdf40d01e8f2b05..144201ccba0c19ce7f85f74671206595b6d7deb6 100644 (file)
@@ -47,6 +47,8 @@ void x509_free_certificate(struct x509_certificate *cert)
                kfree(cert->subject);
                kfree(cert->fingerprint);
                kfree(cert->authority);
+               kfree(cert->sig.digest);
+               mpi_free(cert->sig.rsa.s);
                kfree(cert);
        }
 }
@@ -152,33 +154,33 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
                return -ENOPKG; /* Unsupported combination */
 
        case OID_md4WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_MD5;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_MD5;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha1WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA1;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_SHA1;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha256WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA256;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_SHA256;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha384WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA384;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_SHA384;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha512WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA512;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_SHA512;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
 
        case OID_sha224WithRSAEncryption:
-               ctx->cert->sig_hash_algo = PKEY_HASH_SHA224;
-               ctx->cert->sig_pkey_algo = PKEY_ALGO_RSA;
+               ctx->cert->sig.pkey_hash_algo = PKEY_HASH_SHA224;
+               ctx->cert->sig.pkey_algo = PKEY_ALGO_RSA;
                break;
        }
 
@@ -203,8 +205,8 @@ int x509_note_signature(void *context, size_t hdrlen,
                return -EINVAL;
        }
 
-       ctx->cert->sig = value;
-       ctx->cert->sig_size = vlen;
+       ctx->cert->raw_sig = value;
+       ctx->cert->raw_sig_size = vlen;
        return 0;
 }
 
@@ -343,8 +345,9 @@ int x509_extract_key_data(void *context, size_t hdrlen,
        if (ctx->last_oid != OID_rsaEncryption)
                return -ENOPKG;
 
-       /* There seems to be an extraneous 0 byte on the front of the data */
-       ctx->cert->pkey_algo = PKEY_ALGO_RSA;
+       ctx->cert->pub->pkey_algo = PKEY_ALGO_RSA;
+
+       /* Discard the BIT STRING metadata */
        ctx->key = value + 1;
        ctx->key_size = vlen - 1;
        return 0;
index f86dc5fcc4ad46accfb003b464c784fb5763218b..87d9cc26f630625d7c57e3309456de2a356a46b5 100644 (file)
@@ -9,6 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#include <linux/time.h>
 #include <crypto/public_key.h>
 
 struct x509_certificate {
@@ -20,13 +21,11 @@ struct x509_certificate {
        char            *authority;             /* Authority key fingerprint as hex */
        struct tm       valid_from;
        struct tm       valid_to;
-       enum pkey_algo  pkey_algo : 8;          /* Public key algorithm */
-       enum pkey_algo  sig_pkey_algo : 8;      /* Signature public key algorithm */
-       enum pkey_hash_algo sig_hash_algo : 8;  /* Signature hash algorithm */
        const void      *tbs;                   /* Signed data */
-       size_t          tbs_size;               /* Size of signed data */
-       const void      *sig;                   /* Signature data */
-       size_t          sig_size;               /* Size of sigature */
+       unsigned        tbs_size;               /* Size of signed data */
+       unsigned        raw_sig_size;           /* Size of sigature */
+       const void      *raw_sig;               /* Signature data */
+       struct public_key_signature sig;        /* Signature parameters */
 };
 
 /*
@@ -34,3 +33,10 @@ struct x509_certificate {
  */
 extern void x509_free_certificate(struct x509_certificate *cert);
 extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen);
+
+/*
+ * x509_public_key.c
+ */
+extern int x509_get_sig_params(struct x509_certificate *cert);
+extern int x509_check_signature(const struct public_key *pub,
+                               struct x509_certificate *cert);
index 06007f0e880c330903b5536e9d9c194da302738c..6abc27f2e8a51b0a0d382a6938d818447a8bdab6 100644 (file)
 #include <linux/asn1_decoder.h>
 #include <keys/asymmetric-subtype.h>
 #include <keys/asymmetric-parser.h>
+#include <keys/system_keyring.h>
 #include <crypto/hash.h>
 #include "asymmetric_keys.h"
 #include "public_key.h"
 #include "x509_parser.h"
 
-static const
-struct public_key_algorithm *x509_public_key_algorithms[PKEY_ALGO__LAST] = {
-       [PKEY_ALGO_DSA]         = NULL,
-#if defined(CONFIG_PUBLIC_KEY_ALGO_RSA) || \
-       defined(CONFIG_PUBLIC_KEY_ALGO_RSA_MODULE)
-       [PKEY_ALGO_RSA]         = &RSA_public_key_algorithm,
-#endif
-};
+/*
+ * Find a key in the given keyring by issuer and authority.
+ */
+static struct key *x509_request_asymmetric_key(
+       struct key *keyring,
+       const char *signer, size_t signer_len,
+       const char *authority, size_t auth_len)
+{
+       key_ref_t key;
+       char *id;
+
+       /* Construct an identifier. */
+       id = kmalloc(signer_len + 2 + auth_len + 1, GFP_KERNEL);
+       if (!id)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(id, signer, signer_len);
+       id[signer_len + 0] = ':';
+       id[signer_len + 1] = ' ';
+       memcpy(id + signer_len + 2, authority, auth_len);
+       id[signer_len + 2 + auth_len] = 0;
+
+       pr_debug("Look up: \"%s\"\n", id);
+
+       key = keyring_search(make_key_ref(keyring, 1),
+                            &key_type_asymmetric, id);
+       if (IS_ERR(key))
+               pr_debug("Request for module key '%s' err %ld\n",
+                        id, PTR_ERR(key));
+       kfree(id);
+
+       if (IS_ERR(key)) {
+               switch (PTR_ERR(key)) {
+                       /* Hide some search errors */
+               case -EACCES:
+               case -ENOTDIR:
+               case -EAGAIN:
+                       return ERR_PTR(-ENOKEY);
+               default:
+                       return ERR_CAST(key);
+               }
+       }
+
+       pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
+       return key_ref_to_ptr(key);
+}
 
 /*
- * Check the signature on a certificate using the provided public key
+ * Set up the signature parameters in an X.509 certificate.  This involves
+ * digesting the signed data and extracting the signature.
  */
-static int x509_check_signature(const struct public_key *pub,
-                               const struct x509_certificate *cert)
+int x509_get_sig_params(struct x509_certificate *cert)
 {
-       struct public_key_signature *sig;
        struct crypto_shash *tfm;
        struct shash_desc *desc;
        size_t digest_size, desc_size;
+       void *digest;
        int ret;
 
        pr_devel("==>%s()\n", __func__);
-       
+
+       if (cert->sig.rsa.s)
+               return 0;
+
+       cert->sig.rsa.s = mpi_read_raw_data(cert->raw_sig, cert->raw_sig_size);
+       if (!cert->sig.rsa.s)
+               return -ENOMEM;
+       cert->sig.nr_mpi = 1;
+
        /* Allocate the hashing algorithm we're going to need and find out how
         * big the hash operational data will be.
         */
-       tfm = crypto_alloc_shash(pkey_hash_algo[cert->sig_hash_algo], 0, 0);
+       tfm = crypto_alloc_shash(pkey_hash_algo_name[cert->sig.pkey_hash_algo], 0, 0);
        if (IS_ERR(tfm))
                return (PTR_ERR(tfm) == -ENOENT) ? -ENOPKG : PTR_ERR(tfm);
 
        desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
        digest_size = crypto_shash_digestsize(tfm);
 
-       /* We allocate the hash operational data storage on the end of our
-        * context data.
+       /* We allocate the hash operational data storage on the end of the
+        * digest storage space.
         */
        ret = -ENOMEM;
-       sig = kzalloc(sizeof(*sig) + desc_size + digest_size, GFP_KERNEL);
-       if (!sig)
-               goto error_no_sig;
+       digest = kzalloc(digest_size + desc_size, GFP_KERNEL);
+       if (!digest)
+               goto error;
 
-       sig->pkey_hash_algo     = cert->sig_hash_algo;
-       sig->digest             = (u8 *)sig + sizeof(*sig) + desc_size;
-       sig->digest_size        = digest_size;
+       cert->sig.digest = digest;
+       cert->sig.digest_size = digest_size;
 
-       desc = (void *)sig + sizeof(*sig);
-       desc->tfm       = tfm;
-       desc->flags     = CRYPTO_TFM_REQ_MAY_SLEEP;
+       desc = digest + digest_size;
+       desc->tfm = tfm;
+       desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
        ret = crypto_shash_init(desc);
        if (ret < 0)
                goto error;
+       might_sleep();
+       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, digest);
+error:
+       crypto_free_shash(tfm);
+       pr_devel("<==%s() = %d\n", __func__, ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_get_sig_params);
 
-       ret = -ENOMEM;
-       sig->rsa.s = mpi_read_raw_data(cert->sig, cert->sig_size);
-       if (!sig->rsa.s)
-               goto error;
+/*
+ * Check the signature on a certificate using the provided public key
+ */
+int x509_check_signature(const struct public_key *pub,
+                        struct x509_certificate *cert)
+{
+       int ret;
 
-       ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest);
-       if (ret < 0)
-               goto error_mpi;
+       pr_devel("==>%s()\n", __func__);
 
-       ret = pub->algo->verify_signature(pub, sig);
+       ret = x509_get_sig_params(cert);
+       if (ret < 0)
+               return ret;
 
+       ret = public_key_verify_signature(pub, &cert->sig);
        pr_debug("Cert Verification: %d\n", ret);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(x509_check_signature);
 
-error_mpi:
-       mpi_free(sig->rsa.s);
-error:
-       kfree(sig);
-error_no_sig:
-       crypto_free_shash(tfm);
+/*
+ * Check the new certificate against the ones in the trust keyring.  If one of
+ * those is the signing key and validates the new certificate, then mark the
+ * new certificate as being trusted.
+ *
+ * Return 0 if the new certificate was successfully validated, 1 if we couldn't
+ * find a matching parent certificate in the trusted list and an error if there
+ * is a matching certificate but the signature check fails.
+ */
+static int x509_validate_trust(struct x509_certificate *cert,
+                              struct key *trust_keyring)
+{
+       const struct public_key *pk;
+       struct key *key;
+       int ret = 1;
 
-       pr_devel("<==%s() = %d\n", __func__, ret);
+       key = x509_request_asymmetric_key(trust_keyring,
+                                         cert->issuer, strlen(cert->issuer),
+                                         cert->authority,
+                                         strlen(cert->authority));
+       if (!IS_ERR(key))  {
+               pk = key->payload.data;
+               ret = x509_check_signature(pk, cert);
+       }
        return ret;
 }
 
@@ -106,7 +183,6 @@ error_no_sig:
 static int x509_key_preparse(struct key_preparsed_payload *prep)
 {
        struct x509_certificate *cert;
-       struct tm now;
        size_t srlen, sulen;
        char *desc = NULL;
        int ret;
@@ -117,7 +193,18 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
 
        pr_devel("Cert Issuer: %s\n", cert->issuer);
        pr_devel("Cert Subject: %s\n", cert->subject);
-       pr_devel("Cert Key Algo: %s\n", pkey_algo[cert->pkey_algo]);
+
+       if (cert->pub->pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_algo >= PKEY_ALGO__LAST ||
+           cert->sig.pkey_hash_algo >= PKEY_HASH__LAST ||
+           !pkey_algo[cert->pub->pkey_algo] ||
+           !pkey_algo[cert->sig.pkey_algo] ||
+           !pkey_hash_algo_name[cert->sig.pkey_hash_algo]) {
+               ret = -ENOPKG;
+               goto error_free_cert;
+       }
+
+       pr_devel("Cert Key Algo: %s\n", pkey_algo_name[cert->pub->pkey_algo]);
        pr_devel("Cert Valid From: %04ld-%02d-%02d %02d:%02d:%02d\n",
                 cert->valid_from.tm_year + 1900, cert->valid_from.tm_mon + 1,
                 cert->valid_from.tm_mday, cert->valid_from.tm_hour,
@@ -127,61 +214,29 @@ static int x509_key_preparse(struct key_preparsed_payload *prep)
                 cert->valid_to.tm_mday, cert->valid_to.tm_hour,
                 cert->valid_to.tm_min,  cert->valid_to.tm_sec);
        pr_devel("Cert Signature: %s + %s\n",
-                pkey_algo[cert->sig_pkey_algo],
-                pkey_hash_algo[cert->sig_hash_algo]);
+                pkey_algo_name[cert->sig.pkey_algo],
+                pkey_hash_algo_name[cert->sig.pkey_hash_algo]);
 
-       if (!cert->fingerprint || !cert->authority) {
-               pr_warn("Cert for '%s' must have SubjKeyId and AuthKeyId extensions\n",
+       if (!cert->fingerprint) {
+               pr_warn("Cert for '%s' must have a SubjKeyId extension\n",
                        cert->subject);
                ret = -EKEYREJECTED;
                goto error_free_cert;
        }
 
-       time_to_tm(CURRENT_TIME.tv_sec, 0, &now);
-       pr_devel("Now: %04ld-%02d-%02d %02d:%02d:%02d\n",
-                now.tm_year + 1900, now.tm_mon + 1, now.tm_mday,
-                now.tm_hour, now.tm_min,  now.tm_sec);
-       if (now.tm_year < cert->valid_from.tm_year ||
-           (now.tm_year == cert->valid_from.tm_year &&
-            (now.tm_mon < cert->valid_from.tm_mon ||
-             (now.tm_mon == cert->valid_from.tm_mon &&
-              (now.tm_mday < cert->valid_from.tm_mday ||
-               (now.tm_mday == cert->valid_from.tm_mday &&
-                (now.tm_hour < cert->valid_from.tm_hour ||
-                 (now.tm_hour == cert->valid_from.tm_hour &&
-                  (now.tm_min < cert->valid_from.tm_min ||
-                   (now.tm_min == cert->valid_from.tm_min &&
-                    (now.tm_sec < cert->valid_from.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s is not yet valid\n", cert->fingerprint);
-               ret = -EKEYREJECTED;
-               goto error_free_cert;
-       }
-       if (now.tm_year > cert->valid_to.tm_year ||
-           (now.tm_year == cert->valid_to.tm_year &&
-            (now.tm_mon > cert->valid_to.tm_mon ||
-             (now.tm_mon == cert->valid_to.tm_mon &&
-              (now.tm_mday > cert->valid_to.tm_mday ||
-               (now.tm_mday == cert->valid_to.tm_mday &&
-                (now.tm_hour > cert->valid_to.tm_hour ||
-                 (now.tm_hour == cert->valid_to.tm_hour &&
-                  (now.tm_min > cert->valid_to.tm_min ||
-                   (now.tm_min == cert->valid_to.tm_min &&
-                    (now.tm_sec > cert->valid_to.tm_sec
-                     ))))))))))) {
-               pr_warn("Cert %s has expired\n", cert->fingerprint);
-               ret = -EKEYEXPIRED;
-               goto error_free_cert;
-       }
-
-       cert->pub->algo = x509_public_key_algorithms[cert->pkey_algo];
+       cert->pub->algo = pkey_algo[cert->pub->pkey_algo];
        cert->pub->id_type = PKEY_ID_X509;
 
-       /* Check the signature on the key */
-       if (strcmp(cert->fingerprint, cert->authority) == 0) {
-               ret = x509_check_signature(cert->pub, cert);
+       /* Check the signature on the key if it appears to be self-signed */
+       if (!cert->authority ||
+           strcmp(cert->fingerprint, cert->authority) == 0) {
+               ret = x509_check_signature(cert->pub, cert); /* self-signed */
                if (ret < 0)
                        goto error_free_cert;
+       } else {
+               ret = x509_validate_trust(cert, system_trusted_keyring);
+               if (!ret)
+                       prep->trusted = 1;
        }
 
        /* Propose a description */
@@ -237,3 +292,6 @@ static void __exit x509_key_exit(void)
 
 module_init(x509_key_init);
 module_exit(x509_key_exit);
+
+MODULE_DESCRIPTION("X.509 certificate parser");
+MODULE_LICENSE("GPL");
index 94c0c74434eac641a3ddfcd18324913ecedeaafa..1a65838888cdbb37ec2551fd132c7c91cc66c511 100644 (file)
@@ -33,6 +33,15 @@ config TCG_TIS
          from within Linux.  To compile this driver as a module, choose
          M here; the module will be called tpm_tis.
 
+config TCG_TIS_I2C_ATMEL
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
+       depends on I2C
+       ---help---
+         If you have an Atmel I2C TPM security chip say Yes and it will be
+         accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will
+         be called tpm_tis_i2c_atmel.
+
 config TCG_TIS_I2C_INFINEON
        tristate "TPM Interface Specification 1.2 Interface (I2C - Infineon)"
        depends on I2C
@@ -42,7 +51,17 @@ config TCG_TIS_I2C_INFINEON
          Specification 0.20 say Yes and it will be accessible from within
          Linux.
          To compile this driver as a module, choose M here; the module
-         will be called tpm_tis_i2c_infineon.
+         will be called tpm_i2c_infineon.
+
+config TCG_TIS_I2C_NUVOTON
+       tristate "TPM Interface Specification 1.2 Interface (I2C - Nuvoton)"
+       depends on I2C
+       ---help---
+         If you have a TPM security chip with an I2C interface from
+         Nuvoton Technology Corp. say Yes and it will be accessible
+         from within Linux.
+         To compile this driver as a module, choose M here; the module
+         will be called tpm_i2c_nuvoton.
 
 config TCG_NSC
        tristate "National Semiconductor TPM Interface"
@@ -82,14 +101,14 @@ config TCG_IBMVTPM
          as a module, choose M here; the module will be called tpm_ibmvtpm.
 
 config TCG_ST33_I2C
-        tristate "STMicroelectronics ST33 I2C TPM"
-        depends on I2C
-        depends on GPIOLIB
-        ---help---
-        If you have a TPM security chip from STMicroelectronics working with
-        an I2C bus say Yes and it will be accessible from within Linux.
-        To compile this driver as a module, choose M here; the module will be
-        called tpm_stm_st33_i2c.
+       tristate "STMicroelectronics ST33 I2C TPM"
+       depends on I2C
+       depends on GPIOLIB
+       ---help---
+         If you have a TPM security chip from STMicroelectronics working with
+         an I2C bus say Yes and it will be accessible from within Linux.
+         To compile this driver as a module, choose M here; the module will be
+         called tpm_stm_st33_i2c.
 
 config TCG_XEN
        tristate "XEN TPM Interface"
index eb41ff97d0ad13f577ff942364ffe8314a5a2999..b80a4000daeee7b72f7f945637544312473edeb6 100644 (file)
@@ -2,17 +2,20 @@
 # Makefile for the kernel tpm device drivers.
 #
 obj-$(CONFIG_TCG_TPM) += tpm.o
+tpm-y := tpm-interface.o
+tpm-$(CONFIG_ACPI) += tpm_ppi.o
+
 ifdef CONFIG_ACPI
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_acpi.o tpm_ppi.o
+       tpm-y += tpm_eventlog.o tpm_acpi.o
 else
 ifdef CONFIG_TCG_IBMVTPM
-       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
-       tpm_bios-objs += tpm_eventlog.o tpm_of.o
+       tpm-y += tpm_eventlog.o tpm_of.o
 endif
 endif
 obj-$(CONFIG_TCG_TIS) += tpm_tis.o
+obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
 obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
+obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
similarity index 93%
rename from drivers/char/tpm/tpm.c
rename to drivers/char/tpm/tpm-interface.c
index e3c974a6c522028b96c2df72ab1eb053945b1f43..6ae41d3376302adee51c8be4599e4891bffedf16 100644 (file)
  * Maintained by: <tpmdd-devel@lists.sourceforge.net>
  *
  * Device driver for TCG/TCPA TPM (trusted platform module).
- * Specifications at www.trustedcomputinggroup.org      
+ * Specifications at www.trustedcomputinggroup.org
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation, version 2 of the
  * License.
- * 
+ *
  * Note, the TPM chip is not interrupt driven (only polling)
  * and can have very long timeouts (minutes!). Hence the unusual
  * calls to msleep.
@@ -371,13 +371,14 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
                return -ENODATA;
        if (count > bufsiz) {
                dev_err(chip->dev,
-                       "invalid count value %x %zx \n", count, bufsiz);
+                       "invalid count value %x %zx\n", count, bufsiz);
                return -E2BIG;
        }
 
        mutex_lock(&chip->tpm_mutex);
 
-       if ((rc = chip->vendor.send(chip, (u8 *) buf, count)) < 0) {
+       rc = chip->vendor.send(chip, (u8 *) buf, count);
+       if (rc < 0) {
                dev_err(chip->dev,
                        "tpm_transmit: tpm_send: error %zd\n", rc);
                goto out;
@@ -444,7 +445,7 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd,
 {
        int err;
 
-       len = tpm_transmit(chip,(u8 *) cmd, len);
+       len = tpm_transmit(chip, (u8 *) cmd, len);
        if (len <  0)
                return len;
        else if (len < TPM_HEADER_SIZE)
@@ -658,7 +659,7 @@ static int tpm_continue_selftest(struct tpm_chip *chip)
        return rc;
 }
 
-ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_enabled(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        cap_t cap;
@@ -674,7 +675,7 @@ ssize_t tpm_show_enabled(struct device * dev, struct device_attribute * attr,
 }
 EXPORT_SYMBOL_GPL(tpm_show_enabled);
 
-ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_active(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        cap_t cap;
@@ -690,7 +691,7 @@ ssize_t tpm_show_active(struct device * dev, struct device_attribute * attr,
 }
 EXPORT_SYMBOL_GPL(tpm_show_active);
 
-ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
+ssize_t tpm_show_owned(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        cap_t cap;
@@ -706,8 +707,8 @@ ssize_t tpm_show_owned(struct device * dev, struct device_attribute * attr,
 }
 EXPORT_SYMBOL_GPL(tpm_show_owned);
 
-ssize_t tpm_show_temp_deactivated(struct device * dev,
-                               struct device_attribute * attr, char *buf)
+ssize_t tpm_show_temp_deactivated(struct device *dev,
+                               struct device_attribute *attr, char *buf)
 {
        cap_t cap;
        ssize_t rc;
@@ -769,10 +770,10 @@ static int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 
 /**
  * tpm_pcr_read - read a pcr value
- * @chip_num:  tpm idx # or ANY
+ * @chip_num:  tpm idx # or ANY
  * @pcr_idx:   pcr idx to retrieve
- * @res_buf:   TPM_PCR value
- *             size of res_buf is 20 bytes (or NULL if you don't care)
+ * @res_buf:   TPM_PCR value
+ *             size of res_buf is 20 bytes (or NULL if you don't care)
  *
  * The TPM driver should be built-in, but for whatever reason it
  * isn't, protect against the chip disappearing, by incrementing
@@ -794,9 +795,9 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
 
 /**
  * tpm_pcr_extend - extend pcr value with hash
- * @chip_num:  tpm idx # or AN&
+ * @chip_num:  tpm idx # or AN&
  * @pcr_idx:   pcr idx to extend
- * @hash:      hash value used to extend pcr value
+ * @hash:      hash value used to extend pcr value
  *
  * The TPM driver should be built-in, but for whatever reason it
  * isn't, protect against the chip disappearing, by incrementing
@@ -847,8 +848,7 @@ int tpm_do_selftest(struct tpm_chip *chip)
        unsigned long duration;
        struct tpm_cmd_t cmd;
 
-       duration = tpm_calc_ordinal_duration(chip,
-                                            TPM_ORD_CONTINUE_SELFTEST);
+       duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST);
 
        loops = jiffies_to_msecs(duration) / delay_msec;
 
@@ -965,12 +965,12 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr,
        if (err)
                goto out;
 
-       /* 
+       /*
           ignore header 10 bytes
           algorithm 32 bits (1 == RSA )
           encscheme 16 bits
           sigscheme 16 bits
-          parameters (RSA 12->bytes: keybit, #primes, expbit)  
+          parameters (RSA 12->bytes: keybit, #primes, expbit)
           keylenbytes 32 bits
           256 byte modulus
           ignore checksum 20 bytes
@@ -1020,43 +1020,33 @@ ssize_t tpm_show_caps(struct device *dev, struct device_attribute *attr,
        str += sprintf(str, "Manufacturer: 0x%x\n",
                       be32_to_cpu(cap.manufacturer_id));
 
-       rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
-                       "attempting to determine the 1.1 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version.Major, cap.tpm_version.Minor,
-                      cap.tpm_version.revMajor, cap.tpm_version.revMinor);
-       return str - buf;
-}
-EXPORT_SYMBOL_GPL(tpm_show_caps);
-
-ssize_t tpm_show_caps_1_2(struct device * dev,
-                         struct device_attribute * attr, char *buf)
-{
-       cap_t cap;
-       ssize_t rc;
-       char *str = buf;
-
-       rc = tpm_getcap(dev, TPM_CAP_PROP_MANUFACTURER, &cap,
-                       "attempting to determine the manufacturer");
-       if (rc)
-               return 0;
-       str += sprintf(str, "Manufacturer: 0x%x\n",
-                      be32_to_cpu(cap.manufacturer_id));
+       /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
        rc = tpm_getcap(dev, CAP_VERSION_1_2, &cap,
                         "attempting to determine the 1.2 version");
-       if (rc)
-               return 0;
-       str += sprintf(str,
-                      "TCG version: %d.%d\nFirmware version: %d.%d\n",
-                      cap.tpm_version_1_2.Major, cap.tpm_version_1_2.Minor,
-                      cap.tpm_version_1_2.revMajor,
-                      cap.tpm_version_1_2.revMinor);
+       if (!rc) {
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version_1_2.Major,
+                              cap.tpm_version_1_2.Minor,
+                              cap.tpm_version_1_2.revMajor,
+                              cap.tpm_version_1_2.revMinor);
+       } else {
+               /* Otherwise just use TPM_STRUCT_VER */
+               rc = tpm_getcap(dev, CAP_VERSION_1_1, &cap,
+                               "attempting to determine the 1.1 version");
+               if (rc)
+                       return 0;
+               str += sprintf(str,
+                              "TCG version: %d.%d\nFirmware version: %d.%d\n",
+                              cap.tpm_version.Major,
+                              cap.tpm_version.Minor,
+                              cap.tpm_version.revMajor,
+                              cap.tpm_version.revMinor);
+       }
+
        return str - buf;
 }
-EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
+EXPORT_SYMBOL_GPL(tpm_show_caps);
 
 ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr,
                          char *buf)
@@ -1102,8 +1092,8 @@ ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
 }
 EXPORT_SYMBOL_GPL(tpm_store_cancel);
 
-static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, bool check_cancel,
-                                  bool *canceled)
+static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
+                                       bool check_cancel, bool *canceled)
 {
        u8 status = chip->vendor.status(chip);
 
@@ -1170,38 +1160,25 @@ EXPORT_SYMBOL_GPL(wait_for_tpm_stat);
  */
 int tpm_open(struct inode *inode, struct file *file)
 {
-       int minor = iminor(inode);
-       struct tpm_chip *chip = NULL, *pos;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(pos, &tpm_chip_list, list) {
-               if (pos->vendor.miscdev.minor == minor) {
-                       chip = pos;
-                       get_device(chip->dev);
-                       break;
-               }
-       }
-       rcu_read_unlock();
-
-       if (!chip)
-               return -ENODEV;
+       struct miscdevice *misc = file->private_data;
+       struct tpm_chip *chip = container_of(misc, struct tpm_chip,
+                                            vendor.miscdev);
 
        if (test_and_set_bit(0, &chip->is_open)) {
                dev_dbg(chip->dev, "Another process owns this TPM\n");
-               put_device(chip->dev);
                return -EBUSY;
        }
 
        chip->data_buffer = kzalloc(TPM_BUFSIZE, GFP_KERNEL);
        if (chip->data_buffer == NULL) {
                clear_bit(0, &chip->is_open);
-               put_device(chip->dev);
                return -ENOMEM;
        }
 
        atomic_set(&chip->data_pending, 0);
 
        file->private_data = chip;
+       get_device(chip->dev);
        return 0;
 }
 EXPORT_SYMBOL_GPL(tpm_open);
@@ -1463,7 +1440,6 @@ void tpm_dev_vendor_release(struct tpm_chip *chip)
                chip->vendor.release(chip->dev);
 
        clear_bit(chip->dev_num, dev_mask);
-       kfree(chip->vendor.miscdev.name);
 }
 EXPORT_SYMBOL_GPL(tpm_dev_vendor_release);
 
@@ -1487,7 +1463,7 @@ void tpm_dev_release(struct device *dev)
 EXPORT_SYMBOL_GPL(tpm_dev_release);
 
 /*
- * Called from tpm_<specific>.c probe function only for devices 
+ * Called from tpm_<specific>.c probe function only for devices
  * the driver has determined it should claim.  Prior to calling
  * this function the specific probe function has called pci_enable_device
  * upon errant exit from this function specific probe function should call
@@ -1496,17 +1472,13 @@ EXPORT_SYMBOL_GPL(tpm_dev_release);
 struct tpm_chip *tpm_register_hardware(struct device *dev,
                                        const struct tpm_vendor_specific *entry)
 {
-#define DEVNAME_SIZE 7
-
-       char *devname;
        struct tpm_chip *chip;
 
        /* Driver specific per-device data */
        chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-       devname = kmalloc(DEVNAME_SIZE, GFP_KERNEL);
 
-       if (chip == NULL || devname == NULL)
-               goto out_free;
+       if (chip == NULL)
+               return NULL;
 
        mutex_init(&chip->buffer_mutex);
        mutex_init(&chip->tpm_mutex);
@@ -1531,8 +1503,9 @@ struct tpm_chip *tpm_register_hardware(struct device *dev,
 
        set_bit(chip->dev_num, dev_mask);
 
-       scnprintf(devname, DEVNAME_SIZE, "%s%d", "tpm", chip->dev_num);
-       chip->vendor.miscdev.name = devname;
+       scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm",
+                 chip->dev_num);
+       chip->vendor.miscdev.name = chip->devname;
 
        chip->vendor.miscdev.parent = dev;
        chip->dev = get_device(dev);
@@ -1558,7 +1531,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev,
                goto put_device;
        }
 
-       chip->bios_dir = tpm_bios_log_setup(devname);
+       chip->bios_dir = tpm_bios_log_setup(chip->devname);
 
        /* Make chip available */
        spin_lock(&driver_lock);
@@ -1571,7 +1544,6 @@ put_device:
        put_device(chip->dev);
 out_free:
        kfree(chip);
-       kfree(devname);
        return NULL;
 }
 EXPORT_SYMBOL_GPL(tpm_register_hardware);
index a7bfc176ed4316bc8319220f1b73848bc9d74f06..f32847872193ad7acdf67bef8921c9f48d474854 100644 (file)
@@ -59,8 +59,6 @@ extern ssize_t tpm_show_pcrs(struct device *, struct device_attribute *attr,
                                char *);
 extern ssize_t tpm_show_caps(struct device *, struct device_attribute *attr,
                                char *);
-extern ssize_t tpm_show_caps_1_2(struct device *, struct device_attribute *attr,
-                               char *);
 extern ssize_t tpm_store_cancel(struct device *, struct device_attribute *attr,
                                const char *, size_t);
 extern ssize_t tpm_show_enabled(struct device *, struct device_attribute *attr,
@@ -122,6 +120,7 @@ struct tpm_chip {
        struct device *dev;     /* Device stuff */
 
        int dev_num;            /* /dev/tpm# */
+       char devname[7];
        unsigned long is_open;  /* only one allowed */
        int time_expired;
 
index 99d6820c611db2e0f0f344c9cf701cfc02b0505d..c9a528d25d22001141b5809fa255fbc3d1d42154 100644 (file)
@@ -202,7 +202,7 @@ static int __init init_atmel(void)
 
        have_region =
            (atmel_request_region
-            (tpm_atmel.base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
+            (base, region_size, "tpm_atmel0") == NULL) ? 0 : 1;
 
        pdev = platform_device_register_simple("tpm_atmel", -1, NULL, 0);
        if (IS_ERR(pdev)) {
index 84ddc557b8f8689cd1af8e04ed825e42283fd442..59f7cb28260b4f4c23af010f7615befdd6ed7d23 100644 (file)
@@ -406,7 +406,6 @@ out_tpm:
 out:
        return NULL;
 }
-EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
 
 void tpm_bios_log_teardown(struct dentry **lst)
 {
@@ -415,5 +414,3 @@ void tpm_bios_log_teardown(struct dentry **lst)
        for (i = 0; i < 3; i++)
                securityfs_remove(lst[i]);
 }
-EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
new file mode 100644 (file)
index 0000000..c3cd7fe
--- /dev/null
@@ -0,0 +1,284 @@
+/*
+ * ATMEL I2C TPM AT97SC3204T
+ *
+ * Copyright (C) 2012 V Lab Technologies
+ *  Teddy Reed <teddy@prosauce.org>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ * Device driver for ATMEL I2C TPMs.
+ *
+ * Teddy Reed determined the basic I2C command flow, unlike other I2C TPM
+ * devices the raw TCG formatted TPM command data is written via I2C and then
+ * raw TCG formatted TPM command data is returned via I2C.
+ *
+ * TGC status/locality/etc functions seen in the LPC implementation do not
+ * seem to be present.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+#define I2C_DRIVER_NAME "tpm_i2c_atmel"
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+#define ATMEL_STS_OK 1
+
+struct priv_data {
+       size_t len;
+       /* This is the amount we read on the first try. 25 was chosen to fit a
+        * fair number of read responses in the buffer so a 2nd retry can be
+        * avoided in small message cases. */
+       u8 buffer[sizeof(struct tpm_output_header) + 25];
+};
+
+static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       priv->len = 0;
+
+       if (len <= 2)
+               return -EIO;
+
+       status = i2c_master_send(client, buf, len);
+
+       dev_dbg(chip->dev,
+               "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__,
+               (int)min_t(size_t, 64, len), buf, len, status);
+       return status;
+}
+
+static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       struct tpm_output_header *hdr =
+               (struct tpm_output_header *)priv->buffer;
+       u32 expected_len;
+       int rc;
+
+       if (priv->len == 0)
+               return -EIO;
+
+       /* Get the message size from the message header, if we didn't get the
+        * whole message in read_status then we need to re-read the
+        * message. */
+       expected_len = be32_to_cpu(hdr->length);
+       if (expected_len > count)
+               return -ENOMEM;
+
+       if (priv->len >= expected_len) {
+               dev_dbg(chip->dev,
+                       "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+                       (int)min_t(size_t, 64, expected_len), buf, count,
+                       expected_len);
+               memcpy(buf, priv->buffer, expected_len);
+               return expected_len;
+       }
+
+       rc = i2c_master_recv(client, buf, expected_len);
+       dev_dbg(chip->dev,
+               "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__,
+               (int)min_t(size_t, 64, expected_len), buf, count,
+               expected_len);
+       return rc;
+}
+
+static void i2c_atmel_cancel(struct tpm_chip *chip)
+{
+       dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported");
+}
+
+static u8 i2c_atmel_read_status(struct tpm_chip *chip)
+{
+       struct priv_data *priv = chip->vendor.priv;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int rc;
+
+       /* The TPM fails the I2C read until it is ready, so we do the entire
+        * transfer here and buffer it locally. This way the common code can
+        * properly handle the timeouts. */
+       priv->len = 0;
+       memset(priv->buffer, 0, sizeof(priv->buffer));
+
+
+       /* Once the TPM has completed the command the command remains readable
+        * until another command is issued. */
+       rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer));
+       dev_dbg(chip->dev,
+               "%s: sts=%d", __func__, rc);
+       if (rc <= 0)
+               return 0;
+
+       priv->len = rc;
+
+       return ATMEL_STS_OK;
+}
+
+static const struct file_operations i2c_atmel_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_atmel_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_atmel_attr_grp = {
+       .attrs = i2c_atmel_attrs
+};
+
+static bool i2c_atmel_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return 0;
+}
+
+static const struct tpm_vendor_specific i2c_atmel = {
+       .status = i2c_atmel_read_status,
+       .recv = i2c_atmel_recv,
+       .send = i2c_atmel_send,
+       .cancel = i2c_atmel_cancel,
+       .req_complete_mask = ATMEL_STS_OK,
+       .req_complete_val = ATMEL_STS_OK,
+       .req_canceled = i2c_atmel_req_canceled,
+       .attr_group = &i2c_atmel_attr_grp,
+       .miscdev.fops = &i2c_atmel_ops,
+};
+
+static int i2c_atmel_probe(struct i2c_client *client,
+                          const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+               return -ENODEV;
+
+       chip = tpm_register_hardware(dev, &i2c_atmel);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.irq = 0;
+
+       /* There is no known way to probe for this device, and all version
+        * information seems to be read via TPM commands. Thus we rely on the
+        * TPM startup process in the common code to detect the device. */
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_atmel_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+static const struct i2c_device_id i2c_atmel_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_atmel_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_atmel_of_match[] = {
+       {.compatible = "atmel,at97sc3204t"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_atmel_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_atmel_driver = {
+       .id_table = i2c_atmel_id,
+       .probe = i2c_atmel_probe,
+       .remove = i2c_atmel_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_atmel_pm_ops,
+               .of_match_table = of_match_ptr(i2c_atmel_of_match),
+       },
+};
+
+module_i2c_driver(i2c_atmel_driver);
+
+MODULE_AUTHOR("Jason Gunthorpe <jgunthorpe@obsidianresearch.com>");
+MODULE_DESCRIPTION("Atmel TPM I2C Driver");
+MODULE_LICENSE("GPL");
index b8735de8ce956a3d19fd3ac5b9a5df77c57d3a22..fefd2aa5c81e4aaa6f9069eb26000f9152acface 100644 (file)
@@ -581,7 +581,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
 static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
@@ -685,7 +685,6 @@ out_vendor:
        chip->dev->release = NULL;
        chip->release = NULL;
        tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
 out_err:
        return rc;
 }
@@ -766,7 +765,6 @@ static int tpm_tis_i2c_remove(struct i2c_client *client)
        chip->dev->release = NULL;
        chip->release = NULL;
        tpm_dev.client = NULL;
-       dev_set_drvdata(chip->dev, chip);
 
        return 0;
 }
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
new file mode 100644 (file)
index 0000000..6276fea
--- /dev/null
@@ -0,0 +1,710 @@
+/******************************************************************************
+ * Nuvoton TPM I2C Device Driver Interface for WPCT301/NPCT501,
+ * based on the TCG TPM Interface Spec version 1.2.
+ * Specifications at www.trustedcomputinggroup.org
+ *
+ * Copyright (C) 2011, Nuvoton Technology Corporation.
+ *  Dan Morav <dan.morav@nuvoton.com>
+ * Copyright (C) 2013, Obsidian Research Corp.
+ *  Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/>.
+ *
+ * Nuvoton contact information: APC.Support@nuvoton.com
+ *****************************************************************************/
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/i2c.h>
+#include "tpm.h"
+
+/* I2C interface offsets */
+#define TPM_STS                0x00
+#define TPM_BURST_COUNT        0x01
+#define TPM_DATA_FIFO_W        0x20
+#define TPM_DATA_FIFO_R        0x40
+#define TPM_VID_DID_RID        0x60
+/* TPM command header size */
+#define TPM_HEADER_SIZE        10
+#define TPM_RETRY      5
+/*
+ * I2C bus device maximum buffer size w/o counting I2C address or command
+ * i.e. max size required for I2C write is 34 = addr, command, 32 bytes data
+ */
+#define TPM_I2C_MAX_BUF_SIZE           32
+#define TPM_I2C_RETRY_COUNT            32
+#define TPM_I2C_BUS_DELAY              1       /* msec */
+#define TPM_I2C_RETRY_DELAY_SHORT      2       /* msec */
+#define TPM_I2C_RETRY_DELAY_LONG       10      /* msec */
+
+#define I2C_DRIVER_NAME "tpm_i2c_nuvoton"
+
+struct priv_data {
+       unsigned int intrs;
+};
+
+static s32 i2c_nuvoton_read_buf(struct i2c_client *client, u8 offset, u8 size,
+                               u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_read_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size,
+                                u8 *data)
+{
+       s32 status;
+
+       status = i2c_smbus_write_i2c_block_data(client, offset, size, data);
+       dev_dbg(&client->dev,
+               "%s(offset=%u size=%u data=%*ph) -> sts=%d\n", __func__,
+               offset, size, (int)size, data, status);
+       return status;
+}
+
+#define TPM_STS_VALID          0x80
+#define TPM_STS_COMMAND_READY  0x40
+#define TPM_STS_GO             0x20
+#define TPM_STS_DATA_AVAIL     0x10
+#define TPM_STS_EXPECT         0x08
+#define TPM_STS_RESPONSE_RETRY 0x02
+#define TPM_STS_ERR_VAL        0x07    /* bit2...bit0 reads always 0 */
+
+#define TPM_I2C_SHORT_TIMEOUT  750     /* ms */
+#define TPM_I2C_LONG_TIMEOUT   2000    /* 2 sec */
+
+/* read TPM_STS register */
+static u8 i2c_nuvoton_read_status(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+       u8 data;
+
+       status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data);
+       if (status <= 0) {
+               dev_err(chip->dev, "%s() error return %d\n", __func__,
+                       status);
+               data = TPM_STS_ERR_VAL;
+       }
+
+       return data;
+}
+
+/* write byte to TPM_STS register */
+static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data)
+{
+       s32 status;
+       int i;
+
+       /* this causes the current command to be aborted */
+       for (i = 0, status = -1; i < TPM_I2C_RETRY_COUNT && status < 0; i++) {
+               status = i2c_nuvoton_write_buf(client, TPM_STS, 1, &data);
+               msleep(TPM_I2C_BUS_DELAY);
+       }
+       return status;
+}
+
+/* write commandReady to TPM_STS register */
+static void i2c_nuvoton_ready(struct tpm_chip *chip)
+{
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       s32 status;
+
+       /* this causes the current command to be aborted */
+       status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY);
+       if (status < 0)
+               dev_err(chip->dev,
+                       "%s() fail to write TPM_STS.commandReady\n", __func__);
+}
+
+/* read burstCount field from TPM_STS register
+ * return -1 on fail to read */
+static int i2c_nuvoton_get_burstcount(struct i2c_client *client,
+                                     struct tpm_chip *chip)
+{
+       unsigned long stop = jiffies + chip->vendor.timeout_d;
+       s32 status;
+       int burst_count = -1;
+       u8 data;
+
+       /* wait for burstcount to be non-zero */
+       do {
+               /* in I2C burstCount is 1 byte */
+               status = i2c_nuvoton_read_buf(client, TPM_BURST_COUNT, 1,
+                                             &data);
+               if (status > 0 && data > 0) {
+                       burst_count = min_t(u8, TPM_I2C_MAX_BUF_SIZE, data);
+                       break;
+               }
+               msleep(TPM_I2C_BUS_DELAY);
+       } while (time_before(jiffies, stop));
+
+       return burst_count;
+}
+
+/*
+ * WPCT301/NPCT501 SINT# supports only dataAvail
+ * any call to this function which is not waiting for dataAvail will
+ * set queue to NULL to avoid waiting for interrupt
+ */
+static bool i2c_nuvoton_check_status(struct tpm_chip *chip, u8 mask, u8 value)
+{
+       u8 status = i2c_nuvoton_read_status(chip);
+       return (status != TPM_STS_ERR_VAL) && ((status & mask) == value);
+}
+
+static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value,
+                                    u32 timeout, wait_queue_head_t *queue)
+{
+       if (chip->vendor.irq && queue) {
+               s32 rc;
+               DEFINE_WAIT(wait);
+               struct priv_data *priv = chip->vendor.priv;
+               unsigned int cur_intrs = priv->intrs;
+
+               enable_irq(chip->vendor.irq);
+               rc = wait_event_interruptible_timeout(*queue,
+                                                     cur_intrs != priv->intrs,
+                                                     timeout);
+               if (rc > 0)
+                       return 0;
+               /* At this point we know that the SINT pin is asserted, so we
+                * do not need to do i2c_nuvoton_check_status */
+       } else {
+               unsigned long ten_msec, stop;
+               bool status_valid;
+
+               /* check current status */
+               status_valid = i2c_nuvoton_check_status(chip, mask, value);
+               if (status_valid)
+                       return 0;
+
+               /* use polling to wait for the event */
+               ten_msec = jiffies + msecs_to_jiffies(TPM_I2C_RETRY_DELAY_LONG);
+               stop = jiffies + timeout;
+               do {
+                       if (time_before(jiffies, ten_msec))
+                               msleep(TPM_I2C_RETRY_DELAY_SHORT);
+                       else
+                               msleep(TPM_I2C_RETRY_DELAY_LONG);
+                       status_valid = i2c_nuvoton_check_status(chip, mask,
+                                                               value);
+                       if (status_valid)
+                               return 0;
+               } while (time_before(jiffies, stop));
+       }
+       dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask,
+               value);
+       return -ETIMEDOUT;
+}
+
+/* wait for dataAvail field to be set in the TPM_STS register */
+static int i2c_nuvoton_wait_for_data_avail(struct tpm_chip *chip, u32 timeout,
+                                          wait_queue_head_t *queue)
+{
+       return i2c_nuvoton_wait_for_stat(chip,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+                                        timeout, queue);
+}
+
+/* Read @count bytes into @buf from TPM_RD_FIFO register */
+static int i2c_nuvoton_recv_data(struct i2c_client *client,
+                                struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       s32 rc;
+       int burst_count, bytes2read, size = 0;
+
+       while (size < count &&
+              i2c_nuvoton_wait_for_data_avail(chip,
+                                              chip->vendor.timeout_c,
+                                              &chip->vendor.read_queue) == 0) {
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail to read burstCount=%d\n", __func__,
+                               burst_count);
+                       return -EIO;
+               }
+               bytes2read = min_t(size_t, burst_count, count - size);
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R,
+                                         bytes2read, &buf[size]);
+               if (rc < 0) {
+                       dev_err(chip->dev,
+                               "%s() fail on i2c_nuvoton_read_buf()=%d\n",
+                               __func__, rc);
+                       return -EIO;
+               }
+               dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read);
+               size += bytes2read;
+       }
+
+       return size;
+}
+
+/* Read TPM command results */
+static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       s32 rc;
+       int expected, status, burst_count, retries, size = 0;
+
+       if (count < TPM_HEADER_SIZE) {
+               i2c_nuvoton_ready(chip);    /* return to idle */
+               dev_err(dev, "%s() count < header size\n", __func__);
+               return -EIO;
+       }
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               if (retries > 0) {
+                       /* if this is not the first trial, set responseRetry */
+                       i2c_nuvoton_write_status(client,
+                                                TPM_STS_RESPONSE_RETRY);
+               }
+               /*
+                * read first available (> 10 bytes), including:
+                * tag, paramsize, and result
+                */
+               status = i2c_nuvoton_wait_for_data_avail(
+                       chip, chip->vendor.timeout_c, &chip->vendor.read_queue);
+               if (status != 0) {
+                       dev_err(dev, "%s() timeout on dataAvail\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               burst_count = i2c_nuvoton_get_burstcount(client, chip);
+               if (burst_count < 0) {
+                       dev_err(dev, "%s() fail to get burstCount\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               size = i2c_nuvoton_recv_data(client, chip, buf,
+                                            burst_count);
+               if (size < TPM_HEADER_SIZE) {
+                       dev_err(dev, "%s() fail to read header\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               /*
+                * convert number of expected bytes field from big endian 32 bit
+                * to machine native
+                */
+               expected = be32_to_cpu(*(__be32 *) (buf + 2));
+               if (expected > count) {
+                       dev_err(dev, "%s() expected > count\n", __func__);
+                       size = -EIO;
+                       continue;
+               }
+               rc = i2c_nuvoton_recv_data(client, chip, &buf[size],
+                                          expected - size);
+               size += rc;
+               if (rc < 0 || size < expected) {
+                       dev_err(dev, "%s() fail to read remainder of result\n",
+                               __func__);
+                       size = -EIO;
+                       continue;
+               }
+               if (i2c_nuvoton_wait_for_stat(
+                           chip, TPM_STS_VALID | TPM_STS_DATA_AVAIL,
+                           TPM_STS_VALID, chip->vendor.timeout_c,
+                           NULL)) {
+                       dev_err(dev, "%s() error left over data\n", __func__);
+                       size = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       i2c_nuvoton_ready(chip);
+       dev_dbg(chip->dev, "%s() -> %d\n", __func__, size);
+       return size;
+}
+
+/*
+ * Send TPM command.
+ *
+ * If interrupts are used (signaled by an irq set in the vendor structure)
+ * tpm.c can skip polling for the data to be available as the interrupt is
+ * waited for here
+ */
+static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len)
+{
+       struct device *dev = chip->dev;
+       struct i2c_client *client = to_i2c_client(dev);
+       u32 ordinal;
+       size_t count = 0;
+       int burst_count, bytes2write, retries, rc = -EIO;
+
+       for (retries = 0; retries < TPM_RETRY; retries++) {
+               i2c_nuvoton_ready(chip);
+               if (i2c_nuvoton_wait_for_stat(chip, TPM_STS_COMMAND_READY,
+                                             TPM_STS_COMMAND_READY,
+                                             chip->vendor.timeout_b, NULL)) {
+                       dev_err(dev, "%s() timeout on commandReady\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               rc = 0;
+               while (count < len - 1) {
+                       burst_count = i2c_nuvoton_get_burstcount(client,
+                                                                chip);
+                       if (burst_count < 0) {
+                               dev_err(dev, "%s() fail get burstCount\n",
+                                       __func__);
+                               rc = -EIO;
+                               break;
+                       }
+                       bytes2write = min_t(size_t, burst_count,
+                                           len - 1 - count);
+                       rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W,
+                                                  bytes2write, &buf[count]);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() fail i2cWriteBuf\n",
+                                       __func__);
+                               break;
+                       }
+                       dev_dbg(dev, "%s(%d):", __func__, bytes2write);
+                       count += bytes2write;
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      TPM_STS_VALID |
+                                                      TPM_STS_EXPECT,
+                                                      chip->vendor.timeout_c,
+                                                      NULL);
+                       if (rc < 0) {
+                               dev_err(dev, "%s() timeout on Expect\n",
+                                       __func__);
+                               rc = -ETIMEDOUT;
+                               break;
+                       }
+               }
+               if (rc < 0)
+                       continue;
+
+               /* write last byte */
+               rc = i2c_nuvoton_write_buf(client, TPM_DATA_FIFO_W, 1,
+                                          &buf[count]);
+               if (rc < 0) {
+                       dev_err(dev, "%s() fail to write last byte\n",
+                               __func__);
+                       rc = -EIO;
+                       continue;
+               }
+               dev_dbg(dev, "%s(last): %02x", __func__, buf[count]);
+               rc = i2c_nuvoton_wait_for_stat(chip,
+                                              TPM_STS_VALID | TPM_STS_EXPECT,
+                                              TPM_STS_VALID,
+                                              chip->vendor.timeout_c, NULL);
+               if (rc) {
+                       dev_err(dev, "%s() timeout on Expect to clear\n",
+                               __func__);
+                       rc = -ETIMEDOUT;
+                       continue;
+               }
+               break;
+       }
+       if (rc < 0) {
+               /* retries == TPM_RETRY */
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       /* execute the TPM command */
+       rc = i2c_nuvoton_write_status(client, TPM_STS_GO);
+       if (rc < 0) {
+               dev_err(dev, "%s() fail to write Go\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+       ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
+       rc = i2c_nuvoton_wait_for_data_avail(chip,
+                                            tpm_calc_ordinal_duration(chip,
+                                                                      ordinal),
+                                            &chip->vendor.read_queue);
+       if (rc) {
+               dev_err(dev, "%s() timeout command duration\n", __func__);
+               i2c_nuvoton_ready(chip);
+               return rc;
+       }
+
+       dev_dbg(dev, "%s() -> %zd\n", __func__, len);
+       return len;
+}
+
+static bool i2c_nuvoton_req_canceled(struct tpm_chip *chip, u8 status)
+{
+       return (status == TPM_STS_COMMAND_READY);
+}
+
+static const struct file_operations i2c_nuvoton_ops = {
+       .owner = THIS_MODULE,
+       .llseek = no_llseek,
+       .open = tpm_open,
+       .read = tpm_read,
+       .write = tpm_write,
+       .release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+
+static struct attribute *i2c_nuvoton_attrs[] = {
+       &dev_attr_pubek.attr,
+       &dev_attr_pcrs.attr,
+       &dev_attr_enabled.attr,
+       &dev_attr_active.attr,
+       &dev_attr_owned.attr,
+       &dev_attr_temp_deactivated.attr,
+       &dev_attr_caps.attr,
+       &dev_attr_cancel.attr,
+       &dev_attr_durations.attr,
+       &dev_attr_timeouts.attr,
+       NULL,
+};
+
+static struct attribute_group i2c_nuvoton_attr_grp = {
+       .attrs = i2c_nuvoton_attrs
+};
+
+static const struct tpm_vendor_specific tpm_i2c = {
+       .status = i2c_nuvoton_read_status,
+       .recv = i2c_nuvoton_recv,
+       .send = i2c_nuvoton_send,
+       .cancel = i2c_nuvoton_ready,
+       .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
+       .req_canceled = i2c_nuvoton_req_canceled,
+       .attr_group = &i2c_nuvoton_attr_grp,
+       .miscdev.fops = &i2c_nuvoton_ops,
+};
+
+/* The only purpose for the handler is to signal to any waiting threads that
+ * the interrupt is currently being asserted. The driver does not do any
+ * processing triggered by interrupts, and the chip provides no way to mask at
+ * the source (plus that would be slow over I2C). Run the IRQ as a one-shot,
+ * this means it cannot be shared. */
+static irqreturn_t i2c_nuvoton_int_handler(int dummy, void *dev_id)
+{
+       struct tpm_chip *chip = dev_id;
+       struct priv_data *priv = chip->vendor.priv;
+
+       priv->intrs++;
+       wake_up(&chip->vendor.read_queue);
+       disable_irq_nosync(chip->vendor.irq);
+       return IRQ_HANDLED;
+}
+
+static int get_vid(struct i2c_client *client, u32 *res)
+{
+       static const u8 vid_did_rid_value[] = { 0x50, 0x10, 0xfe };
+       u32 temp;
+       s32 rc;
+
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+               return -ENODEV;
+       rc = i2c_nuvoton_read_buf(client, TPM_VID_DID_RID, 4, (u8 *)&temp);
+       if (rc < 0)
+               return rc;
+
+       /* check WPCT301 values - ignore RID */
+       if (memcmp(&temp, vid_did_rid_value, sizeof(vid_did_rid_value))) {
+               /*
+                * f/w rev 2.81 has an issue where the VID_DID_RID is not
+                * reporting the right value. so give it another chance at
+                * offset 0x20 (FIFO_W).
+                */
+               rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_W, 4,
+                                         (u8 *) (&temp));
+               if (rc < 0)
+                       return rc;
+
+               /* check WPCT301 values - ignore RID */
+               if (memcmp(&temp, vid_did_rid_value,
+                          sizeof(vid_did_rid_value)))
+                       return -ENODEV;
+       }
+
+       *res = temp;
+       return 0;
+}
+
+static int i2c_nuvoton_probe(struct i2c_client *client,
+                            const struct i2c_device_id *id)
+{
+       int rc;
+       struct tpm_chip *chip;
+       struct device *dev = &client->dev;
+       u32 vid = 0;
+
+       rc = get_vid(client, &vid);
+       if (rc)
+               return rc;
+
+       dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid,
+                (u8) (vid >> 16), (u8) (vid >> 24));
+
+       chip = tpm_register_hardware(dev, &tpm_i2c);
+       if (!chip) {
+               dev_err(dev, "%s() error in tpm_register_hardware\n", __func__);
+               return -ENODEV;
+       }
+
+       chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data),
+                                        GFP_KERNEL);
+       init_waitqueue_head(&chip->vendor.read_queue);
+       init_waitqueue_head(&chip->vendor.int_queue);
+
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TPM_I2C_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT);
+
+       /*
+        * I2C intfcaps (interrupt capabilitieis) in the chip are hard coded to:
+        *   TPM_INTF_INT_LEVEL_LOW | TPM_INTF_DATA_AVAIL_INT
+        * The IRQ should be set in the i2c_board_info (which is done
+        * automatically in of_i2c_register_devices, for device tree users */
+       chip->vendor.irq = client->irq;
+
+       if (chip->vendor.irq) {
+               dev_dbg(dev, "%s() chip-vendor.irq\n", __func__);
+               rc = devm_request_irq(dev, chip->vendor.irq,
+                                     i2c_nuvoton_int_handler,
+                                     IRQF_TRIGGER_LOW,
+                                     chip->vendor.miscdev.name,
+                                     chip);
+               if (rc) {
+                       dev_err(dev, "%s() Unable to request irq: %d for use\n",
+                               __func__, chip->vendor.irq);
+                       chip->vendor.irq = 0;
+               } else {
+                       /* Clear any pending interrupt */
+                       i2c_nuvoton_ready(chip);
+                       /* - wait for TPM_STS==0xA0 (stsValid, commandReady) */
+                       rc = i2c_nuvoton_wait_for_stat(chip,
+                                                      TPM_STS_COMMAND_READY,
+                                                      TPM_STS_COMMAND_READY,
+                                                      chip->vendor.timeout_b,
+                                                      NULL);
+                       if (rc == 0) {
+                               /*
+                                * TIS is in ready state
+                                * write dummy byte to enter reception state
+                                * TPM_DATA_FIFO_W <- rc (0)
+                                */
+                               rc = i2c_nuvoton_write_buf(client,
+                                                          TPM_DATA_FIFO_W,
+                                                          1, (u8 *) (&rc));
+                               if (rc < 0)
+                                       goto out_err;
+                               /* TPM_STS <- 0x40 (commandReady) */
+                               i2c_nuvoton_ready(chip);
+                       } else {
+                               /*
+                                * timeout_b reached - command was
+                                * aborted. TIS should now be in idle state -
+                                * only TPM_STS_VALID should be set
+                                */
+                               if (i2c_nuvoton_read_status(chip) !=
+                                   TPM_STS_VALID) {
+                                       rc = -EIO;
+                                       goto out_err;
+                               }
+                       }
+               }
+       }
+
+       if (tpm_get_timeouts(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       if (tpm_do_selftest(chip)) {
+               rc = -ENODEV;
+               goto out_err;
+       }
+
+       return 0;
+
+out_err:
+       tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(chip->dev);
+       return rc;
+}
+
+static int i2c_nuvoton_remove(struct i2c_client *client)
+{
+       struct device *dev = &(client->dev);
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+
+       if (chip)
+               tpm_dev_vendor_release(chip);
+       tpm_remove_hardware(dev);
+       kfree(chip);
+       return 0;
+}
+
+
+static const struct i2c_device_id i2c_nuvoton_id[] = {
+       {I2C_DRIVER_NAME, 0},
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, i2c_nuvoton_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_nuvoton_of_match[] = {
+       {.compatible = "nuvoton,npct501"},
+       {.compatible = "winbond,wpct301"},
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_nuvoton_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
+
+static struct i2c_driver i2c_nuvoton_driver = {
+       .id_table = i2c_nuvoton_id,
+       .probe = i2c_nuvoton_probe,
+       .remove = i2c_nuvoton_remove,
+       .driver = {
+               .name = I2C_DRIVER_NAME,
+               .owner = THIS_MODULE,
+               .pm = &i2c_nuvoton_pm_ops,
+               .of_match_table = of_match_ptr(i2c_nuvoton_of_match),
+       },
+};
+
+module_i2c_driver(i2c_nuvoton_driver);
+
+MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)");
+MODULE_DESCRIPTION("Nuvoton TPM I2C Driver");
+MODULE_LICENSE("GPL");
index 5bb8e2ddd3b3b9b3a51557a605e8675a8f5c3e64..a0d6ceb5d00551b2c10f4341d1b5b5bd2d812e0b 100644 (file)
@@ -584,7 +584,7 @@ static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
 static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 
 static struct attribute *stm_tpm_attrs[] = {
@@ -746,8 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
        tpm_get_timeouts(chip);
 
-       i2c_set_clientdata(client, chip);
-
        dev_info(chip->dev, "TPM I2C Initialized\n");
        return 0;
 _irq_set:
@@ -807,24 +805,18 @@ static int tpm_st33_i2c_remove(struct i2c_client *client)
 #ifdef CONFIG_PM_SLEEP
 /*
  * tpm_st33_i2c_pm_suspend suspend the TPM device
- * Added: Work around when suspend and no tpm application is running, suspend
- * may fail because chip->data_buffer is not set (only set in tpm_open in Linux
- * TPM core)
  * @param: client, the i2c_client drescription (TPM I2C description).
  * @param: mesg, the power management message.
  * @return: 0 in case of success.
  */
 static int tpm_st33_i2c_pm_suspend(struct device *dev)
 {
-       struct tpm_chip *chip = dev_get_drvdata(dev);
        struct st33zp24_platform_data *pin_infos = dev->platform_data;
        int ret = 0;
 
        if (power_mgt) {
                gpio_set_value(pin_infos->io_lpcpd, 0);
        } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                ret = tpm_pm_suspend(dev);
        }
        return ret;
@@ -849,8 +841,6 @@ static int tpm_st33_i2c_pm_resume(struct device *dev)
                                          TPM_STS_VALID) == TPM_STS_VALID,
                                          chip->vendor.timeout_b);
        } else {
-               if (chip->data_buffer == NULL)
-                       chip->data_buffer = pin_infos->tpm_i2c_buffer[0];
                ret = tpm_pm_resume(dev);
                if (!ret)
                        tpm_do_selftest(chip);
index 56b07c35a13e173bf1cc5d10321d30d5a11f259e..2783a42aa73295406bee2ff845f5ae08a2d9c738 100644 (file)
@@ -98,7 +98,7 @@ static int tpm_ibmvtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
 
        if (count < len) {
                dev_err(ibmvtpm->dev,
-                       "Invalid size in recv: count=%ld, crq_size=%d\n",
+                       "Invalid size in recv: count=%zd, crq_size=%d\n",
                        count, len);
                return -EIO;
        }
@@ -136,7 +136,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 
        if (count > ibmvtpm->rtce_size) {
                dev_err(ibmvtpm->dev,
-                       "Invalid size in send: count=%ld, rtce_size=%d\n",
+                       "Invalid size in send: count=%zd, rtce_size=%d\n",
                        count, ibmvtpm->rtce_size);
                return -EIO;
        }
@@ -419,7 +419,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                   NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
index 2168d15bc728e22c99a61976de5050c26dc33cbe..8e562dc656016cd9c4dbeaaa3dac0a96a02d0f26 100644 (file)
@@ -452,12 +452,8 @@ int tpm_add_ppi(struct kobject *parent)
 {
        return sysfs_create_group(parent, &ppi_attr_grp);
 }
-EXPORT_SYMBOL_GPL(tpm_add_ppi);
 
 void tpm_remove_ppi(struct kobject *parent)
 {
        sysfs_remove_group(parent, &ppi_attr_grp);
 }
-EXPORT_SYMBOL_GPL(tpm_remove_ppi);
-
-MODULE_LICENSE("GPL");
index 5796d0157ce0c3bbd82c662bf61f8035e9d9daf8..1b74459c072399109d1730a6d60471e64ffcc48b 100644 (file)
@@ -448,7 +448,7 @@ static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
 static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
 static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
                   NULL);
-static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
 static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
 static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
 static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
index 94c280d36e8b3bfaea00ee36c4fe3215818b3c86..c8ff4df81779f3b66a3e62abab055b4e404b81e7 100644 (file)
@@ -351,8 +351,6 @@ static int tpmfront_probe(struct xenbus_device *dev,
 
        tpm_get_timeouts(priv->chip);
 
-       dev_set_drvdata(&dev->dev, priv->chip);
-
        return rv;
 }
 
index f5b0224c99679ed23b8475a75b78992d3ca8c899..b34fda4dcabf091cc0f3bdc833996b94ba8bd86f 100644 (file)
@@ -22,7 +22,8 @@ enum pkey_algo {
        PKEY_ALGO__LAST
 };
 
-extern const char *const pkey_algo[PKEY_ALGO__LAST];
+extern const char *const pkey_algo_name[PKEY_ALGO__LAST];
+extern const struct public_key_algorithm *pkey_algo[PKEY_ALGO__LAST];
 
 enum pkey_hash_algo {
        PKEY_HASH_MD4,
@@ -36,7 +37,7 @@ enum pkey_hash_algo {
        PKEY_HASH__LAST
 };
 
-extern const char *const pkey_hash_algo[PKEY_HASH__LAST];
+extern const char *const pkey_hash_algo_name[PKEY_HASH__LAST];
 
 enum pkey_id_type {
        PKEY_ID_PGP,            /* OpenPGP generated key ID */
@@ -44,7 +45,7 @@ enum pkey_id_type {
        PKEY_ID_TYPE__LAST
 };
 
-extern const char *const pkey_id_type[PKEY_ID_TYPE__LAST];
+extern const char *const pkey_id_type_name[PKEY_ID_TYPE__LAST];
 
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
@@ -59,6 +60,7 @@ struct public_key {
 #define PKEY_CAN_DECRYPT       0x02
 #define PKEY_CAN_SIGN          0x04
 #define PKEY_CAN_VERIFY                0x08
+       enum pkey_algo pkey_algo : 8;
        enum pkey_id_type id_type : 8;
        union {
                MPI     mpi[5];
@@ -88,6 +90,7 @@ struct public_key_signature {
        u8 *digest;
        u8 digest_size;                 /* Number of bytes in digest */
        u8 nr_mpi;                      /* Occupancy of mpi[] */
+       enum pkey_algo pkey_algo : 8;
        enum pkey_hash_algo pkey_hash_algo : 8;
        union {
                MPI mpi[2];
diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h
new file mode 100644 (file)
index 0000000..d69bc8a
--- /dev/null
@@ -0,0 +1,25 @@
+/* Big capacity key type.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_BIG_KEY_TYPE_H
+#define _KEYS_BIG_KEY_TYPE_H
+
+#include <linux/key-type.h>
+
+extern struct key_type key_type_big_key;
+
+extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern void big_key_revoke(struct key *key);
+extern void big_key_destroy(struct key *key);
+extern void big_key_describe(const struct key *big_key, struct seq_file *m);
+extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen);
+
+#endif /* _KEYS_BIG_KEY_TYPE_H */
index cf49159b0e3a4f47c7890a122ae44ea0dc35a5da..fca5c62340a47fbbb25386002d7c8293fab7a40b 100644 (file)
@@ -1,6 +1,6 @@
 /* Keyring key type
  *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
 #define _KEYS_KEYRING_TYPE_H
 
 #include <linux/key.h>
-#include <linux/rcupdate.h>
-
-/*
- * the keyring payload contains a list of the keys to which the keyring is
- * subscribed
- */
-struct keyring_list {
-       struct rcu_head rcu;            /* RCU deletion hook */
-       unsigned short  maxkeys;        /* max keys this list can hold */
-       unsigned short  nkeys;          /* number of keys currently held */
-       unsigned short  delkey;         /* key to be unlinked by RCU */
-       struct key __rcu *keys[0];
-};
-
+#include <linux/assoc_array.h>
 
 #endif /* _KEYS_KEYRING_TYPE_H */
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
new file mode 100644 (file)
index 0000000..8dabc39
--- /dev/null
@@ -0,0 +1,23 @@
+/* System keyring containing trusted public keys.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_SYSTEM_KEYRING_H
+#define _KEYS_SYSTEM_KEYRING_H
+
+#ifdef CONFIG_SYSTEM_TRUSTED_KEYRING
+
+#include <linux/key.h>
+
+extern struct key *system_trusted_keyring;
+
+#endif
+
+#endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
new file mode 100644 (file)
index 0000000..9a193b8
--- /dev/null
@@ -0,0 +1,92 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_H
+#define _LINUX_ASSOC_ARRAY_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/types.h>
+
+#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
+
+/*
+ * Generic associative array.
+ */
+struct assoc_array {
+       struct assoc_array_ptr  *root;          /* The node at the root of the tree */
+       unsigned long           nr_leaves_on_tree;
+};
+
+/*
+ * Operations on objects and index keys for use by array manipulation routines.
+ */
+struct assoc_array_ops {
+       /* Method to get a chunk of an index key from caller-supplied data */
+       unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+       /* Method to get a piece of an object's index key */
+       unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+       /* Is this the object we're looking for? */
+       bool (*compare_object)(const void *object, const void *index_key);
+
+       /* How different are two objects, to a bit position in their keys? (or
+        * -1 if they're the same)
+        */
+       int (*diff_objects)(const void *a, const void *b);
+
+       /* Method to free an object. */
+       void (*free_object)(void *object);
+};
+
+/*
+ * Access and manipulation functions.
+ */
+struct assoc_array_edit;
+
+static inline void assoc_array_init(struct assoc_array *array)
+{
+       array->root = NULL;
+       array->nr_leaves_on_tree = 0;
+}
+
+extern int assoc_array_iterate(const struct assoc_array *array,
+                              int (*iterator)(const void *object,
+                                              void *iterator_data),
+                              void *iterator_data);
+extern void *assoc_array_find(const struct assoc_array *array,
+                             const struct assoc_array_ops *ops,
+                             const void *index_key);
+extern void assoc_array_destroy(struct assoc_array *array,
+                               const struct assoc_array_ops *ops);
+extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key,
+                                                  void *object);
+extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
+                                         void *object);
+extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                                  const struct assoc_array_ops *ops,
+                                                  const void *index_key);
+extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                                 const struct assoc_array_ops *ops);
+extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
+extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+extern int assoc_array_gc(struct assoc_array *array,
+                         const struct assoc_array_ops *ops,
+                         bool (*iterator)(void *object, void *iterator_data),
+                         void *iterator_data);
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
new file mode 100644 (file)
index 0000000..711275e
--- /dev/null
@@ -0,0 +1,182 @@
+/* Private definitions for the generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
+#define _LINUX_ASSOC_ARRAY_PRIV_H
+
+#ifdef CONFIG_ASSOCIATIVE_ARRAY
+
+#include <linux/assoc_array.h>
+
+#define ASSOC_ARRAY_FAN_OUT            16      /* Number of slots per node */
+#define ASSOC_ARRAY_FAN_MASK           (ASSOC_ARRAY_FAN_OUT - 1)
+#define ASSOC_ARRAY_LEVEL_STEP         (ilog2(ASSOC_ARRAY_FAN_OUT))
+#define ASSOC_ARRAY_LEVEL_STEP_MASK    (ASSOC_ARRAY_LEVEL_STEP - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_MASK     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
+#define ASSOC_ARRAY_KEY_CHUNK_SHIFT    (ilog2(BITS_PER_LONG))
+
+/*
+ * Undefined type representing a pointer with type information in the bottom
+ * two bits.
+ */
+struct assoc_array_ptr;
+
+/*
+ * An N-way node in the tree.
+ *
+ * Each slot contains one of four things:
+ *
+ *     (1) Nothing (NULL).
+ *
+ *     (2) A leaf object (pointer types 0).
+ *
+ *     (3) A next-level node (pointer type 1, subtype 0).
+ *
+ *     (4) A shortcut (pointer type 1, subtype 1).
+ *
+ * The tree is optimised for search-by-ID, but permits reasonable iteration
+ * also.
+ *
+ * The tree is navigated by constructing an index key consisting of an array of
+ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
+ *
+ * The segments correspond to levels of the tree (the first segment is used at
+ * level 0, the second at level 1, etc.).
+ */
+struct assoc_array_node {
+       struct assoc_array_ptr  *back_pointer;
+       u8                      parent_slot;
+       struct assoc_array_ptr  *slots[ASSOC_ARRAY_FAN_OUT];
+       unsigned long           nr_leaves_on_branch;
+};
+
+/*
+ * A shortcut through the index space out to where a collection of nodes/leaves
+ * with the same IDs live.
+ */
+struct assoc_array_shortcut {
+       struct assoc_array_ptr  *back_pointer;
+       int                     parent_slot;
+       int                     skip_to_level;
+       struct assoc_array_ptr  *next_node;
+       unsigned long           index_key[];
+};
+
+/*
+ * Preallocation cache.
+ */
+struct assoc_array_edit {
+       struct rcu_head                 rcu;
+       struct assoc_array              *array;
+       const struct assoc_array_ops    *ops;
+       const struct assoc_array_ops    *ops_for_excised_subtree;
+       struct assoc_array_ptr          *leaf;
+       struct assoc_array_ptr          **leaf_p;
+       struct assoc_array_ptr          *dead_leaf;
+       struct assoc_array_ptr          *new_meta[3];
+       struct assoc_array_ptr          *excised_meta[1];
+       struct assoc_array_ptr          *excised_subtree;
+       struct assoc_array_ptr          **set_backpointers[ASSOC_ARRAY_FAN_OUT];
+       struct assoc_array_ptr          *set_backpointers_to;
+       struct assoc_array_node         *adjust_count_on;
+       long                            adjust_count_by;
+       struct {
+               struct assoc_array_ptr  **ptr;
+               struct assoc_array_ptr  *to;
+       } set[2];
+       struct {
+               u8                      *p;
+               u8                      to;
+       } set_parent_slot[1];
+       u8                              segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
+};
+
+/*
+ * Internal tree member pointers are marked in the bottom one or two bits to
+ * indicate what type they are so that we don't have to look behind every
+ * pointer to see what it points to.
+ *
+ * We provide functions to test type annotations and to create and translate
+ * the annotated pointers.
+ */
+#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
+#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL        /* Points to leaf (or nowhere) */
+#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL        /* Points to node or shortcut */
+#define ASSOC_ARRAY_PTR_SUBTYPE_MASK   0x2UL
+#define ASSOC_ARRAY_PTR_NODE_SUBTYPE   0x0UL
+#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
+
+static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_meta(x);
+}
+static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
+}
+static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
+{
+       return !assoc_array_ptr_is_shortcut(x);
+}
+
+static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
+{
+       return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+
+static inline
+unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x &
+               ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
+}
+static inline
+struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
+}
+static inline
+struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
+{
+       return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
+}
+
+static inline
+struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
+{
+       return (struct assoc_array_ptr *)((unsigned long)p | t);
+}
+static inline
+struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
+{
+       return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
+}
+static inline
+struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
+{
+       return __assoc_array_x_to_ptr(
+               p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
+}
+
+#endif /* CONFIG_ASSOCIATIVE_ARRAY */
+#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
index 518a53afb9ea24b301d87d4af6c824bca5fc5548..a74c3a84dfdd05223cc0613c9445bdc396e1afe9 100644 (file)
@@ -45,6 +45,7 @@ struct key_preparsed_payload {
        const void      *data;          /* Raw data */
        size_t          datalen;        /* Raw datalen */
        size_t          quotalen;       /* Quota length for proposed payload */
+       bool            trusted;        /* True if key is trusted */
 };
 
 typedef int (*request_key_actor_t)(struct key_construction *key,
@@ -63,6 +64,11 @@ struct key_type {
         */
        size_t def_datalen;
 
+       /* Default key search algorithm. */
+       unsigned def_lookup_type;
+#define KEYRING_SEARCH_LOOKUP_DIRECT   0x0000  /* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE  0x0001  /* Iterative search. */
+
        /* vet a description */
        int (*vet_description)(const char *description);
 
index 4dfde1161c5e7878565d05ad7f5293e9e4d19cef..80d677483e31f95ce4b08ef00ffb79c239932c18 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/sysctl.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
+#include <linux/assoc_array.h>
 
 #ifdef __KERNEL__
 #include <linux/uidgid.h>
@@ -82,6 +83,12 @@ struct key_owner;
 struct keyring_list;
 struct keyring_name;
 
+struct keyring_index_key {
+       struct key_type         *type;
+       const char              *description;
+       size_t                  desc_len;
+};
+
 /*****************************************************************************/
 /*
  * key reference with possession attribute handling
@@ -99,7 +106,7 @@ struct keyring_name;
 typedef struct __key_reference_with_attributes *key_ref_t;
 
 static inline key_ref_t make_key_ref(const struct key *key,
-                                    unsigned long possession)
+                                    bool possession)
 {
        return (key_ref_t) ((unsigned long) key | possession);
 }
@@ -109,7 +116,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref)
        return (struct key *) ((unsigned long) key_ref & ~1UL);
 }
 
-static inline unsigned long is_key_possessed(const key_ref_t key_ref)
+static inline bool is_key_possessed(const key_ref_t key_ref)
 {
        return (unsigned long) key_ref & 1UL;
 }
@@ -129,7 +136,6 @@ struct key {
                struct list_head graveyard_link;
                struct rb_node  serial_node;
        };
-       struct key_type         *type;          /* type of key */
        struct rw_semaphore     sem;            /* change vs change sem */
        struct key_user         *user;          /* owner of this key */
        void                    *security;      /* security data for this key */
@@ -162,13 +168,21 @@ struct key {
 #define KEY_FLAG_NEGATIVE      5       /* set if key is negative */
 #define KEY_FLAG_ROOT_CAN_CLEAR        6       /* set if key can be cleared by root without permission */
 #define KEY_FLAG_INVALIDATED   7       /* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED       8       /* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY  9       /* set if keyring only accepts links to trusted keys */
 
-       /* the description string
-        * - this is used to match a key against search criteria
-        * - this should be a printable string
+       /* the key type and key description string
+        * - the desc is used to match a key against search criteria
+        * - it should be a printable string
         * - eg: for krb5 AFS, this might be "afs@REDHAT.COM"
         */
-       char                    *description;
+       union {
+               struct keyring_index_key index_key;
+               struct {
+                       struct key_type *type;          /* type of key */
+                       char            *description;
+               };
+       };
 
        /* type specific data
         * - this is used by the keyring type to index the name
@@ -185,11 +199,14 @@ struct key {
         *   whatever
         */
        union {
-               unsigned long           value;
-               void __rcu              *rcudata;
-               void                    *data;
-               struct keyring_list __rcu *subscriptions;
-       } payload;
+               union {
+                       unsigned long           value;
+                       void __rcu              *rcudata;
+                       void                    *data;
+                       void                    *data2[2];
+               } payload;
+               struct assoc_array keys;
+       };
 };
 
 extern struct key *key_alloc(struct key_type *type,
@@ -203,18 +220,23 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_IN_QUOTA     0x0000  /* add to quota, reject if would overrun */
 #define KEY_ALLOC_QUOTA_OVERRUN        0x0001  /* add to quota, permit even if overrun */
 #define KEY_ALLOC_NOT_IN_QUOTA 0x0002  /* not in quota */
+#define KEY_ALLOC_TRUSTED      0x0004  /* Key should be flagged as trusted */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
 extern void key_put(struct key *key);
 
-static inline struct key *key_get(struct key *key)
+static inline struct key *__key_get(struct key *key)
 {
-       if (key)
-               atomic_inc(&key->usage);
+       atomic_inc(&key->usage);
        return key;
 }
 
+static inline struct key *key_get(struct key *key)
+{
+       return key ? __key_get(key) : key;
+}
+
 static inline void key_ref_put(key_ref_t key_ref)
 {
        key_put(key_ref_to_ptr(key_ref));
index 9d37e2b9d3ec030f026117d99de94dfd50b32ab0..5623a7f965b7bbb07ab94a72351aec027ef4adb7 100644 (file)
@@ -1052,17 +1052,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @xfrm_policy_delete_security:
  *     @ctx contains the xfrm_sec_ctx.
  *     Authorize deletion of xp->security.
- * @xfrm_state_alloc_security:
+ * @xfrm_state_alloc:
  *     @x contains the xfrm_state being added to the Security Association
  *     Database by the XFRM system.
  *     @sec_ctx contains the security context information being provided by
  *     the user-level SA generation program (e.g., setkey or racoon).
- *     @secid contains the secid from which to take the mls portion of the context.
  *     Allocate a security structure to the x->security field; the security
  *     field is initialized to NULL when the xfrm_state is allocated. Set the
- *     context to correspond to either sec_ctx or polsec, with the mls portion
- *     taken from secid in the latter case.
- *     Return 0 if operation was successful (memory to allocate, legal context).
+ *     context to correspond to sec_ctx. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
+ * @xfrm_state_alloc_acquire:
+ *     @x contains the xfrm_state being added to the Security Association
+ *     Database by the XFRM system.
+ *     @polsec contains the policy's security context.
+ *     @secid contains the secid from which to take the mls portion of the
+ *     context.
+ *     Allocate a security structure to the x->security field; the security
+ *     field is initialized to NULL when the xfrm_state is allocated. Set the
+ *     context to correspond to secid. Return 0 if operation was successful
+ *     (memory to allocate, legal context).
  * @xfrm_state_free_security:
  *     @x contains the xfrm_state.
  *     Deallocate x->security.
@@ -1679,9 +1687,11 @@ struct security_operations {
        int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx);
        void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx);
        int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx);
-       int (*xfrm_state_alloc_security) (struct xfrm_state *x,
-               struct xfrm_user_sec_ctx *sec_ctx,
-               u32 secid);
+       int (*xfrm_state_alloc) (struct xfrm_state *x,
+                                struct xfrm_user_sec_ctx *sec_ctx);
+       int (*xfrm_state_alloc_acquire) (struct xfrm_state *x,
+                                        struct xfrm_sec_ctx *polsec,
+                                        u32 secid);
        void (*xfrm_state_free_security) (struct xfrm_state *x);
        int (*xfrm_state_delete_security) (struct xfrm_state *x);
        int (*xfrm_policy_lookup) (struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
index 4db29859464f3af3d78caafa41a1e7525034a589..4836ba3c1cd8266c294b9dfd28aa6d0d433db0d6 100644 (file)
@@ -27,6 +27,12 @@ struct user_namespace {
        kuid_t                  owner;
        kgid_t                  group;
        unsigned int            proc_inum;
+
+       /* Register of per-UID persistent keyrings for this namespace */
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       struct key              *persistent_keyring_register;
+       struct rw_semaphore     persistent_keyring_register_sem;
+#endif
 };
 
 extern struct user_namespace init_user_ns;
index c9b7f4faf97aa5a790f6b58fd6d9da25ca2ebd64..840cb990abe2e7147ec89c92ef143e86a35ce0f8 100644 (file)
@@ -56,5 +56,6 @@
 #define KEYCTL_REJECT                  19      /* reject a partially constructed key */
 #define KEYCTL_INSTANTIATE_IOV         20      /* instantiate a partially constructed key */
 #define KEYCTL_INVALIDATE              21      /* invalidate a key */
+#define KEYCTL_GET_PERSISTENT          22      /* get a user's persistent keyring */
 
 #endif /*  _LINUX_KEYCTL_H */
index d9887456007a83b212eb41dabf95f7d8c781c6b4..73766535ded5bc0a230248db7f7552bb8df7c53d 100644 (file)
@@ -1668,6 +1668,18 @@ config BASE_SMALL
        default 0 if BASE_FULL
        default 1 if !BASE_FULL
 
+config SYSTEM_TRUSTED_KEYRING
+       bool "Provide system-wide ring of trusted keys"
+       depends on KEYS
+       help
+         Provide a system keyring to which trusted keys can be added.  Keys in
+         the keyring are considered to be trusted.  Keys may be added at will
+         by the kernel from compiled-in data and from hardware key stores, but
+         userspace may only add extra keys if those keys can be verified by
+         keys already in the keyring.
+
+         Keys in this keyring are used by module signature checking.
+
 menuconfig MODULES
        bool "Enable loadable module support"
        option modules
@@ -1741,6 +1753,7 @@ config MODULE_SRCVERSION_ALL
 config MODULE_SIG
        bool "Module signature verification"
        depends on MODULES
+       select SYSTEM_TRUSTED_KEYRING
        select KEYS
        select CRYPTO
        select ASYMMETRIC_KEY_TYPE
index 1ce47553fb020e97e2a930309242c2c1e5161437..63136989c132e6095320547098e157878fd1942a 100644 (file)
@@ -54,8 +54,9 @@ obj-$(CONFIG_SMP) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
 obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
+obj-$(CONFIG_MODULE_SIG) += module_signing.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
@@ -141,19 +142,52 @@ targets += timeconst.h
 $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
        $(call if_changed,bc)
 
-ifeq ($(CONFIG_MODULE_SIG),y)
+###############################################################################
+#
+# Roll all the X.509 certificates that we can find together and pull them into
+# the kernel so that they get loaded into the system trusted keyring during
+# boot.
 #
-# Pull the signing certificate and any extra certificates into the kernel
+# We look in the source root and the build root for all files whose name ends
+# in ".x509".  Unfortunately, this will generate duplicate filenames, so we
+# have make canonicalise the pathnames and then sort them to discard the
+# duplicates.
 #
+###############################################################################
+ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y)
+X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509)
+X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509
+X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \
+                               $(or $(realpath $(CERT)),$(CERT))))
+
+ifeq ($(X509_CERTIFICATES),)
+$(warning *** No X.509 certificates found ***)
+endif
+
+ifneq ($(wildcard $(obj)/.x509.list),)
+ifneq ($(shell cat $(obj)/.x509.list),$(X509_CERTIFICATES))
+$(info X.509 certificate list changed)
+$(shell rm $(obj)/.x509.list)
+endif
+endif
+
+kernel/system_certificates.o: $(obj)/x509_certificate_list
 
-quiet_cmd_touch = TOUCH   $@
-      cmd_touch = touch   $@
+quiet_cmd_x509certs  = CERTS   $@
+      cmd_x509certs  = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo "  - Including cert $(X509)")
 
-extra_certificates:
-       $(call cmd,touch)
+targets += $(obj)/x509_certificate_list
+$(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list
+       $(call if_changed,x509certs)
 
-kernel/modsign_certificate.o: signing_key.x509 extra_certificates
+targets += $(obj)/.x509.list
+$(obj)/.x509.list:
+       @echo $(X509_CERTIFICATES) >$@
 
+clean-files := x509_certificate_list .x509.list
+endif
+
+ifeq ($(CONFIG_MODULE_SIG),y)
 ###############################################################################
 #
 # If module signing is requested, say by allyesconfig, but a key has not been
diff --git a/kernel/modsign_certificate.S b/kernel/modsign_certificate.S
deleted file mode 100644 (file)
index 4a9a86d..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/export.h>
-
-#define GLOBAL(name)   \
-       .globl VMLINUX_SYMBOL(name);    \
-       VMLINUX_SYMBOL(name):
-
-       .section ".init.data","aw"
-
-GLOBAL(modsign_certificate_list)
-       .incbin "signing_key.x509"
-       .incbin "extra_certificates"
-GLOBAL(modsign_certificate_list_end)
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
deleted file mode 100644 (file)
index 7cbd450..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-/* Public keys for module signature verification
- *
- * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/cred.h>
-#include <linux/err.h>
-#include <keys/asymmetric-type.h>
-#include "module-internal.h"
-
-struct key *modsign_keyring;
-
-extern __initconst const u8 modsign_certificate_list[];
-extern __initconst const u8 modsign_certificate_list_end[];
-
-/*
- * We need to make sure ccache doesn't cache the .o file as it doesn't notice
- * if modsign.pub changes.
- */
-static __initconst const char annoy_ccache[] = __TIME__ "foo";
-
-/*
- * Load the compiled-in keys
- */
-static __init int module_verify_init(void)
-{
-       pr_notice("Initialise module verification\n");
-
-       modsign_keyring = keyring_alloc(".module_sign",
-                                       KUIDT_INIT(0), KGIDT_INIT(0),
-                                       current_cred(),
-                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                        KEY_USR_VIEW | KEY_USR_READ),
-                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
-       if (IS_ERR(modsign_keyring))
-               panic("Can't allocate module signing keyring\n");
-
-       return 0;
-}
-
-/*
- * Must be initialised before we try and load the keys into the keyring.
- */
-device_initcall(module_verify_init);
-
-/*
- * Load the compiled-in keys
- */
-static __init int load_module_signing_keys(void)
-{
-       key_ref_t key;
-       const u8 *p, *end;
-       size_t plen;
-
-       pr_notice("Loading module verification certificates\n");
-
-       end = modsign_certificate_list_end;
-       p = modsign_certificate_list;
-       while (p < end) {
-               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
-                * than 256 bytes in size.
-                */
-               if (end - p < 4)
-                       goto dodgy_cert;
-               if (p[0] != 0x30 &&
-                   p[1] != 0x82)
-                       goto dodgy_cert;
-               plen = (p[2] << 8) | p[3];
-               plen += 4;
-               if (plen > end - p)
-                       goto dodgy_cert;
-
-               key = key_create_or_update(make_key_ref(modsign_keyring, 1),
-                                          "asymmetric",
-                                          NULL,
-                                          p,
-                                          plen,
-                                          (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                                          KEY_USR_VIEW,
-                                          KEY_ALLOC_NOT_IN_QUOTA);
-               if (IS_ERR(key))
-                       pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
-                              PTR_ERR(key));
-               else
-                       pr_notice("MODSIGN: Loaded cert '%s'\n",
-                                 key_ref_to_ptr(key)->description);
-               p += plen;
-       }
-
-       return 0;
-
-dodgy_cert:
-       pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
-       return 0;
-}
-late_initcall(load_module_signing_keys);
index 24f9247b7d0214d4b3755bb4396f14d9ef712753..915e123a430fbb0cb6ea9197b90ab6ec3684a191 100644 (file)
@@ -9,6 +9,4 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
-extern struct key *modsign_keyring;
-
 extern int mod_verify_sig(const void *mod, unsigned long *_modlen);
index f2970bddc5ea6224b8c0357970a543c90ac11da0..0b6b870dc5e4fe642fe0b149a4c6d77bb75861b4 100644 (file)
@@ -14,6 +14,7 @@
 #include <crypto/public_key.h>
 #include <crypto/hash.h>
 #include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
 #include "module-internal.h"
 
 /*
@@ -54,7 +55,7 @@ static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
        /* Allocate the hashing algorithm we're going to need and find out how
         * big the hash operational data will be.
         */
-       tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
+       tfm = crypto_alloc_shash(pkey_hash_algo_name[hash], 0, 0);
        if (IS_ERR(tfm))
                return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
 
@@ -157,7 +158,7 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
 
        pr_debug("Look up: \"%s\"\n", id);
 
-       key = keyring_search(make_key_ref(modsign_keyring, 1),
+       key = keyring_search(make_key_ref(system_trusted_keyring, 1),
                             &key_type_asymmetric, id);
        if (IS_ERR(key))
                pr_warn("Request for unknown module key '%s' err %ld\n",
@@ -217,7 +218,7 @@ int mod_verify_sig(const void *mod, unsigned long *_modlen)
                return -ENOPKG;
 
        if (ms.hash >= PKEY_HASH__LAST ||
-           !pkey_hash_algo[ms.hash])
+           !pkey_hash_algo_name[ms.hash])
                return -ENOPKG;
 
        key = request_asymmetric_key(sig, ms.signer_len,
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S
new file mode 100644 (file)
index 0000000..552d47b
--- /dev/null
@@ -0,0 +1,12 @@
+#include <linux/export.h>
+#include <linux/init.h>
+
+#define GLOBAL(name)   \
+       .globl VMLINUX_SYMBOL(name);    \
+       VMLINUX_SYMBOL(name):
+
+       __INITRODATA
+
+GLOBAL(system_certificate_list)
+       .incbin "kernel/x509_certificate_list"
+GLOBAL(system_certificate_list_end)
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c
new file mode 100644 (file)
index 0000000..564dd93
--- /dev/null
@@ -0,0 +1,105 @@
+/* System trusted keyring for trusted public keys
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "module-internal.h"
+
+struct key *system_trusted_keyring;
+EXPORT_SYMBOL_GPL(system_trusted_keyring);
+
+extern __initconst const u8 system_certificate_list[];
+extern __initconst const u8 system_certificate_list_end[];
+
+/*
+ * Load the compiled-in keys
+ */
+static __init int system_trusted_keyring_init(void)
+{
+       pr_notice("Initialise system trusted keyring\n");
+
+       system_trusted_keyring =
+               keyring_alloc(".system_keyring",
+                             KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+                             ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                             KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+                             KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(system_trusted_keyring))
+               panic("Can't allocate system trusted keyring\n");
+
+       set_bit(KEY_FLAG_TRUSTED_ONLY, &system_trusted_keyring->flags);
+       return 0;
+}
+
+/*
+ * Must be initialised before we try and load the keys into the keyring.
+ */
+device_initcall(system_trusted_keyring_init);
+
+/*
+ * Load the compiled-in list of X.509 certificates.
+ */
+static __init int load_system_certificate_list(void)
+{
+       key_ref_t key;
+       const u8 *p, *end;
+       size_t plen;
+
+       pr_notice("Loading compiled-in X.509 certificates\n");
+
+       end = system_certificate_list_end;
+       p = system_certificate_list;
+       while (p < end) {
+               /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+                * than 256 bytes in size.
+                */
+               if (end - p < 4)
+                       goto dodgy_cert;
+               if (p[0] != 0x30 &&
+                   p[1] != 0x82)
+                       goto dodgy_cert;
+               plen = (p[2] << 8) | p[3];
+               plen += 4;
+               if (plen > end - p)
+                       goto dodgy_cert;
+
+               key = key_create_or_update(make_key_ref(system_trusted_keyring, 1),
+                                          "asymmetric",
+                                          NULL,
+                                          p,
+                                          plen,
+                                          ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                          KEY_USR_VIEW | KEY_USR_READ),
+                                          KEY_ALLOC_NOT_IN_QUOTA |
+                                          KEY_ALLOC_TRUSTED);
+               if (IS_ERR(key)) {
+                       pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+                              PTR_ERR(key));
+               } else {
+                       pr_notice("Loaded X.509 cert '%s'\n",
+                                 key_ref_to_ptr(key)->description);
+                       key_ref_put(key);
+               }
+               p += plen;
+       }
+
+       return 0;
+
+dodgy_cert:
+       pr_err("Problem parsing in-kernel X.509 certificate list\n");
+       return 0;
+}
+late_initcall(load_system_certificate_list);
index 5bbb91988e69278f2cd012896db29688c9234476..a3a0dbfda32957616f143ae2722541a5846c0a62 100644 (file)
@@ -51,6 +51,10 @@ struct user_namespace init_user_ns = {
        .owner = GLOBAL_ROOT_UID,
        .group = GLOBAL_ROOT_GID,
        .proc_inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_KEYS_KERBEROS_CACHE
+       .krb_cache_register_sem =
+       __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem),
+#endif
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
index 13fb1134ba582e49c8aa3643feada72a2b0dae8b..240fb62cf3945aa0f7b601b343db65312a42f345 100644 (file)
@@ -101,6 +101,9 @@ int create_user_ns(struct cred *new)
 
        set_cred_user_ns(new, ns);
 
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       init_rwsem(&ns->persistent_keyring_register_sem);
+#endif
        return 0;
 }
 
@@ -130,6 +133,9 @@ void free_user_ns(struct user_namespace *ns)
 
        do {
                parent = ns->parent;
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+               key_put(ns->persistent_keyring_register);
+#endif
                proc_free_inum(ns->proc_inum);
                kmem_cache_free(user_ns_cachep, ns);
                ns = parent;
index b3c8be0da17f8472429ad79787c76d42fa62eb02..3cb879b1f28276e9f8c98dc0670caaa0dcec48df 100644 (file)
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
 config BTREE
        boolean
 
+config ASSOCIATIVE_ARRAY
+       bool
+       help
+         Generic associative array.  Can be searched and iterated over whilst
+         it is being modified.  It is also reasonably quick to search and
+         modify.  The algorithms are non-recursive, and the trees are highly
+         capacious.
+
+         See:
+
+               Documentation/assoc_array.txt
+
+         for more information.
+
 config HAS_IOMEM
        boolean
        depends on !NO_IOMEM
index f3bb2cb98adfd2a631144ecbde775f6f224bea4e..1e806477e472d00fab1295004b82d9986b11f09e 100644 (file)
@@ -51,6 +51,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644 (file)
index 0000000..17edeaf
--- /dev/null
@@ -0,0 +1,1746 @@
+/* Generic associative array implementation.
+ *
+ * See Documentation/assoc_array.txt for information.
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+//#define DEBUG
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/assoc_array_priv.h>
+
+/*
+ * Iterate over an associative array.  The caller must hold the RCU read lock
+ * or better.
+ */
+static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
+                                      const struct assoc_array_ptr *stop,
+                                      int (*iterator)(const void *leaf,
+                                                      void *iterator_data),
+                                      void *iterator_data)
+{
+       const struct assoc_array_shortcut *shortcut;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *cursor, *ptr, *parent;
+       unsigned long has_meta;
+       int slot, ret;
+
+       cursor = root;
+
+begin_node:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               smp_read_barrier_depends();
+               cursor = ACCESS_ONCE(shortcut->next_node);
+       }
+
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+       slot = 0;
+
+       /* We perform two passes of each node.
+        *
+        * The first pass does all the leaves in this node.  This means we
+        * don't miss any leaves if the node is split up by insertion whilst
+        * we're iterating over the branches rooted here (we may, however, see
+        * some leaves twice).
+        */
+       has_meta = 0;
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               has_meta |= (unsigned long)ptr;
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       smp_read_barrier_depends();
+
+                       /* Invoke the callback */
+                       ret = iterator(assoc_array_ptr_to_leaf(ptr),
+                                      iterator_data);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       /* The second pass attends to all the metadata pointers.  If we follow
+        * one of these we may find that we don't come back here, but rather go
+        * back to a replacement node with the leaves in a different layout.
+        *
+        * We are guaranteed to make progress, however, as the slot number for
+        * a particular portion of the key space cannot change - and we
+        * continue at the back pointer + 1.
+        */
+       if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
+               goto finished_node;
+       slot = 0;
+
+continue_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       cursor = ptr;
+                       goto begin_node;
+               }
+       }
+
+finished_node:
+       /* Move up to the parent (may need to skip back over a shortcut) */
+       parent = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+       if (parent == stop)
+               return 0;
+
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               smp_read_barrier_depends();
+               cursor = parent;
+               parent = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+               if (parent == stop)
+                       return 0;
+       }
+
+       /* Ascend to next slot in parent node */
+       cursor = parent;
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_iterate - Pass all objects in the array to a callback
+ * @array: The array to iterate over.
+ * @iterator: The callback function.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Iterate over all the objects in an associative array.  Each one will be
+ * presented to the iterator function.
+ *
+ * If the array is being modified concurrently with the iteration then it is
+ * possible that some objects in the array will be passed to the iterator
+ * callback more than once - though every object should be passed at least
+ * once.  If this is undesirable then the caller must lock against modification
+ * for the duration of this function.
+ *
+ * The function will return 0 if no objects were in the array or else it will
+ * return the result of the last iterator function called.  Iteration stops
+ * immediately if any call to the iteration function results in a non-zero
+ * return.
+ *
+ * The caller should hold the RCU read lock or better if concurrent
+ * modification is possible.
+ */
+int assoc_array_iterate(const struct assoc_array *array,
+                       int (*iterator)(const void *object,
+                                       void *iterator_data),
+                       void *iterator_data)
+{
+       struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
+
+       if (!root)
+               return 0;
+       return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
+}
+
+enum assoc_array_walk_status {
+       assoc_array_walk_tree_empty,
+       assoc_array_walk_found_terminal_node,
+       assoc_array_walk_found_wrong_shortcut,
+} status;
+
+struct assoc_array_walk_result {
+       struct {
+               struct assoc_array_node *node;  /* Node in which leaf might be found */
+               int             level;
+               int             slot;
+       } terminal_node;
+       struct {
+               struct assoc_array_shortcut *shortcut;
+               int             level;
+               int             sc_level;
+               unsigned long   sc_segments;
+               unsigned long   dissimilarity;
+       } wrong_shortcut;
+};
+
+/*
+ * Navigate through the internal tree looking for the closest node to the key.
+ */
+static enum assoc_array_walk_status
+assoc_array_walk(const struct assoc_array *array,
+                const struct assoc_array_ops *ops,
+                const void *index_key,
+                struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *ptr;
+       unsigned long sc_segments, dissimilarity;
+       unsigned long segments;
+       int level, sc_level, next_sc_level;
+       int slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = ACCESS_ONCE(array->root);
+       if (!cursor)
+               return assoc_array_walk_tree_empty;
+
+       level = 0;
+
+       /* Use segments from the key for the new leaf to navigate through the
+        * internal tree, skipping through nodes and shortcuts that are on
+        * route to the destination.  Eventually we'll come to a slot that is
+        * either empty or contains a leaf at which point we've found a node in
+        * which the leaf we're looking for might be found or into which it
+        * should be inserted.
+        */
+jumped:
+       segments = ops->get_key_chunk(index_key, level);
+       pr_devel("segments[%d]: %lx\n", level, segments);
+
+       if (assoc_array_ptr_is_shortcut(cursor))
+               goto follow_shortcut;
+
+consider_node:
+       node = assoc_array_ptr_to_node(cursor);
+       smp_read_barrier_depends();
+
+       slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       slot &= ASSOC_ARRAY_FAN_MASK;
+       ptr = ACCESS_ONCE(node->slots[slot]);
+
+       pr_devel("consider slot %x [ix=%d type=%lu]\n",
+                slot, level, (unsigned long)ptr & 3);
+
+       if (!assoc_array_ptr_is_meta(ptr)) {
+               /* The node doesn't have a node/shortcut pointer in the slot
+                * corresponding to the index key that we have to follow.
+                */
+               result->terminal_node.node = node;
+               result->terminal_node.level = level;
+               result->terminal_node.slot = slot;
+               pr_devel("<--%s() = terminal_node\n", __func__);
+               return assoc_array_walk_found_terminal_node;
+       }
+
+       if (assoc_array_ptr_is_node(ptr)) {
+               /* There is a pointer to a node in the slot corresponding to
+                * this index key segment, so we need to follow it.
+                */
+               cursor = ptr;
+               level += ASSOC_ARRAY_LEVEL_STEP;
+               if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
+                       goto consider_node;
+               goto jumped;
+       }
+
+       /* There is a shortcut in the slot corresponding to the index key
+        * segment.  We follow the shortcut if its partial index key matches
+        * this leaf's.  Otherwise we need to split the shortcut.
+        */
+       cursor = ptr;
+follow_shortcut:
+       shortcut = assoc_array_ptr_to_shortcut(cursor);
+       smp_read_barrier_depends();
+       pr_devel("shortcut to %d\n", shortcut->skip_to_level);
+       sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
+       BUG_ON(sc_level > shortcut->skip_to_level);
+
+       do {
+               /* Check the leaf against the shortcut's index key a word at a
+                * time, trimming the final word (the shortcut stores the index
+                * key completely from the root to the shortcut's target).
+                */
+               if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
+                       segments = ops->get_key_chunk(index_key, sc_level);
+
+               sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
+               dissimilarity = segments ^ sc_segments;
+
+               if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
+                       /* Trim segments that are beyond the shortcut */
+                       int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+                       dissimilarity &= ~(ULONG_MAX << shift);
+                       next_sc_level = shortcut->skip_to_level;
+               } else {
+                       next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
+                       next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               }
+
+               if (dissimilarity != 0) {
+                       /* This shortcut points elsewhere */
+                       result->wrong_shortcut.shortcut = shortcut;
+                       result->wrong_shortcut.level = level;
+                       result->wrong_shortcut.sc_level = sc_level;
+                       result->wrong_shortcut.sc_segments = sc_segments;
+                       result->wrong_shortcut.dissimilarity = dissimilarity;
+                       return assoc_array_walk_found_wrong_shortcut;
+               }
+
+               sc_level = next_sc_level;
+       } while (sc_level < shortcut->skip_to_level);
+
+       /* The shortcut matches the leaf's index to this point. */
+       cursor = ACCESS_ONCE(shortcut->next_node);
+       if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
+               level = sc_level;
+               goto jumped;
+       } else {
+               level = sc_level;
+               goto consider_node;
+       }
+}
+
+/**
+ * assoc_array_find - Find an object by index key
+ * @array: The associative array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Find an object in an associative array by walking through the internal tree
+ * to the node that should contain the object and then searching the leaves
+ * there.  NULL is returned if the requested object was not found in the array.
+ *
+ * The caller must hold the RCU read lock or better.
+ */
+void *assoc_array_find(const struct assoc_array *array,
+                      const struct assoc_array_ops *ops,
+                      const void *index_key)
+{
+       struct assoc_array_walk_result result;
+       const struct assoc_array_node *node;
+       const struct assoc_array_ptr *ptr;
+       const void *leaf;
+       int slot;
+
+       if (assoc_array_walk(array, ops, index_key, &result) !=
+           assoc_array_walk_found_terminal_node)
+               return NULL;
+
+       node = result.terminal_node.node;
+       smp_read_barrier_depends();
+
+       /* If the target key is available to us, it's has to be pointed to by
+        * the terminal node.
+        */
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+               if (ptr && assoc_array_ptr_is_leaf(ptr)) {
+                       /* We need a barrier between the read of the pointer
+                        * and dereferencing the pointer - but only if we are
+                        * actually going to dereference it.
+                        */
+                       leaf = assoc_array_ptr_to_leaf(ptr);
+                       smp_read_barrier_depends();
+                       if (ops->compare_object(leaf, index_key))
+                               return (void *)leaf;
+               }
+       }
+
+       return NULL;
+}
+
+/*
+ * Destructively iterate over an associative array.  The caller must prevent
+ * other simultaneous accesses.
+ */
+static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
+                                       const struct assoc_array_ops *ops)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *cursor, *parent = NULL;
+       int slot = -1;
+
+       pr_devel("-->%s()\n", __func__);
+
+       cursor = root;
+       if (!cursor) {
+               pr_devel("empty\n");
+               return;
+       }
+
+move_to_meta:
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               /* Descend through a shortcut */
+               pr_devel("[%d] shortcut\n", slot);
+               BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               BUG_ON(shortcut->back_pointer != parent);
+               BUG_ON(slot != -1 && shortcut->parent_slot != slot);
+               parent = cursor;
+               cursor = shortcut->next_node;
+               slot = -1;
+               BUG_ON(!assoc_array_ptr_is_node(cursor));
+       }
+
+       pr_devel("[%d] node\n", slot);
+       node = assoc_array_ptr_to_node(cursor);
+       BUG_ON(node->back_pointer != parent);
+       BUG_ON(slot != -1 && node->parent_slot != slot);
+       slot = 0;
+
+continue_node:
+       pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_ptr *ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       parent = cursor;
+                       cursor = ptr;
+                       goto move_to_meta;
+               }
+
+               if (ops) {
+                       pr_devel("[%d] free leaf\n", slot);
+                       ops->free_object(assoc_array_ptr_to_leaf(ptr));
+               }
+       }
+
+       parent = node->back_pointer;
+       slot = node->parent_slot;
+       pr_devel("free node\n");
+       kfree(node);
+       if (!parent)
+               return; /* Done */
+
+       /* Move back up to the parent (may need to free a shortcut on
+        * the way up) */
+       if (assoc_array_ptr_is_shortcut(parent)) {
+               shortcut = assoc_array_ptr_to_shortcut(parent);
+               BUG_ON(shortcut->next_node != cursor);
+               cursor = parent;
+               parent = shortcut->back_pointer;
+               slot = shortcut->parent_slot;
+               pr_devel("free shortcut\n");
+               kfree(shortcut);
+               if (!parent)
+                       return;
+
+               BUG_ON(!assoc_array_ptr_is_node(parent));
+       }
+
+       /* Ascend to next slot in parent node */
+       pr_devel("ascend to %p[%d]\n", parent, slot);
+       cursor = parent;
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+}
+
+/**
+ * assoc_array_destroy - Destroy an associative array
+ * @array: The array to destroy.
+ * @ops: The operations to use.
+ *
+ * Discard all metadata and free all objects in an associative array.  The
+ * array will be empty and ready to use again upon completion.  This function
+ * cannot fail.
+ *
+ * The caller must prevent all other accesses whilst this takes place as no
+ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
+ * accesses to continue.  On the other hand, no memory allocation is required.
+ */
+void assoc_array_destroy(struct assoc_array *array,
+                        const struct assoc_array_ops *ops)
+{
+       assoc_array_destroy_subtree(array->root, ops);
+       array->root = NULL;
+}
+
+/*
+ * Handle insertion into an empty tree.
+ */
+static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
+{
+       struct assoc_array_node *new_n0;
+
+       pr_devel("-->%s()\n", __func__);
+
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[0];
+       edit->adjust_count_on = new_n0;
+       edit->set[0].ptr = &edit->array->root;
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+
+       pr_devel("<--%s() = ok [no root]\n", __func__);
+       return true;
+}
+
+/*
+ * Handle insertion into a terminal node.
+ */
+static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
+                                                 const struct assoc_array_ops *ops,
+                                                 const void *index_key,
+                                                 struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0;
+       struct assoc_array_node *node, *new_n0, *new_n1, *side;
+       struct assoc_array_ptr *ptr;
+       unsigned long dissimilarity, base_seg, blank;
+       size_t keylen;
+       bool have_meta;
+       int level, diff;
+       int slot, next_slot, free_slot, i, j;
+
+       node    = result->terminal_node.node;
+       level   = result->terminal_node.level;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* We arrived at a node which doesn't have an onward node or shortcut
+        * pointer that we have to follow.  This means that (a) the leaf we
+        * want must go here (either by insertion or replacement) or (b) we
+        * need to split this node and insert in one of the fragments.
+        */
+       free_slot = -1;
+
+       /* Firstly, we have to check the leaves in this node to see if there's
+        * a matching one we should replace in place.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (!ptr) {
+                       free_slot = i;
+                       continue;
+               }
+               if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+                       pr_devel("replace in slot %d\n", i);
+                       edit->leaf_p = &node->slots[i];
+                       edit->dead_leaf = node->slots[i];
+                       pr_devel("<--%s() = ok [replace]\n", __func__);
+                       return true;
+               }
+       }
+
+       /* If there is a free slot in this node then we can just insert the
+        * leaf here.
+        */
+       if (free_slot >= 0) {
+               pr_devel("insert in free slot %d\n", free_slot);
+               edit->leaf_p = &node->slots[free_slot];
+               edit->adjust_count_on = node;
+               pr_devel("<--%s() = ok [insert]\n", __func__);
+               return true;
+       }
+
+       /* The node has no spare slots - so we're either going to have to split
+        * it or insert another node before it.
+        *
+        * Whatever, we're going to need at least two new nodes - so allocate
+        * those now.  We may also need a new shortcut, but we deal with that
+        * when we need it.
+        */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n1)
+               return false;
+       edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
+
+       /* We need to find out how similar the leaves are. */
+       pr_devel("no spare slots\n");
+       have_meta = false;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               if (assoc_array_ptr_is_meta(ptr)) {
+                       edit->segment_cache[i] = 0xff;
+                       have_meta = true;
+                       continue;
+               }
+               base_seg = ops->get_object_key_chunk(
+                       assoc_array_ptr_to_leaf(ptr), level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       if (have_meta) {
+               pr_devel("have meta\n");
+               goto split_node;
+       }
+
+       /* The node contains only leaves */
+       dissimilarity = 0;
+       base_seg = edit->segment_cache[0];
+       for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
+               dissimilarity |= edit->segment_cache[i] ^ base_seg;
+
+       pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
+
+       if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
+               /* The old leaves all cluster in the same slot.  We will need
+                * to insert a shortcut if the new node wants to cluster with them.
+                */
+               if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
+                       goto all_leaves_cluster_together;
+
+               /* Otherwise we can just insert a new node ahead of the old
+                * one.
+                */
+               goto present_leaves_cluster_but_not_new_leaf;
+       }
+
+split_node:
+       pr_devel("split node\n");
+
+       /* We need to split the current node; we know that the node doesn't
+        * simply contain a full set of leaves that cluster together (it
+        * contains meta pointers and/or non-clustering leaves).
+        *
+        * We need to expel at least two leaves out of a set consisting of the
+        * leaves in the node and the new leaf.
+        *
+        * We need a new node (n0) to replace the current one and a new node to
+        * take the expelled nodes (n1).
+        */
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+do_split_node:
+       pr_devel("do_split_node\n");
+
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->nr_leaves_on_branch = 0;
+
+       /* Begin by finding two matching leaves.  There have to be at least two
+        * that match - even if there are meta pointers - because any leaf that
+        * would match a slot with a meta pointer in it must be somewhere
+        * behind that meta pointer and cannot be here.  Further, given N
+        * remaining leaf slots, we now have N+1 leaves to go in them.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               slot = edit->segment_cache[i];
+               if (slot != 0xff)
+                       for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
+                               if (edit->segment_cache[j] == slot)
+                                       goto found_slot_for_multiple_occupancy;
+       }
+found_slot_for_multiple_occupancy:
+       pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
+       BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
+       BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
+       BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
+
+       new_n1->parent_slot = slot;
+
+       /* Metadata pointers cannot change slot */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       new_n0->slots[i] = node->slots[i];
+               else
+                       new_n0->slots[i] = NULL;
+       BUG_ON(new_n0->slots[slot] != NULL);
+       new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
+
+       /* Filter the leaf pointers between the new nodes */
+       free_slot = -1;
+       next_slot = 0;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (assoc_array_ptr_is_meta(node->slots[i]))
+                       continue;
+               if (edit->segment_cache[i] == slot) {
+                       new_n1->slots[next_slot++] = node->slots[i];
+                       new_n1->nr_leaves_on_branch++;
+               } else {
+                       do {
+                               free_slot++;
+                       } while (new_n0->slots[free_slot] != NULL);
+                       new_n0->slots[free_slot] = node->slots[i];
+               }
+       }
+
+       pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
+
+       if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
+               do {
+                       free_slot++;
+               } while (new_n0->slots[free_slot] != NULL);
+               edit->leaf_p = &new_n0->slots[free_slot];
+               edit->adjust_count_on = new_n0;
+       } else {
+               edit->leaf_p = &new_n1->slots[next_slot++];
+               edit->adjust_count_on = new_n1;
+       }
+
+       BUG_ON(next_slot <= 1);
+
+       edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               if (edit->segment_cache[i] == 0xff) {
+                       ptr = node->slots[i];
+                       BUG_ON(assoc_array_ptr_is_leaf(ptr));
+                       if (assoc_array_ptr_is_node(ptr)) {
+                               side = assoc_array_ptr_to_node(ptr);
+                               edit->set_backpointers[i] = &side->back_pointer;
+                       } else {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               edit->set_backpointers[i] = &shortcut->back_pointer;
+                       }
+               }
+       }
+
+       ptr = node->back_pointer;
+       if (!ptr)
+               edit->set[0].ptr = &edit->array->root;
+       else if (assoc_array_ptr_is_node(ptr))
+               edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
+       else
+               edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [split node]\n", __func__);
+       return true;
+
+present_leaves_cluster_but_not_new_leaf:
+       /* All the old leaves cluster in the same slot, but the new leaf wants
+        * to go into a different slot, so we create a new node to hold the new
+        * leaf and a pointer to a new node holding all the old leaves.
+        */
+       pr_devel("present leaves cluster but not new leaf\n");
+
+       new_n0->back_pointer = node->back_pointer;
+       new_n0->parent_slot = node->parent_slot;
+       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = edit->segment_cache[0];
+       new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
+       edit->adjust_count_on = new_n0;
+
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
+               new_n1->slots[i] = node->slots[i];
+
+       new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
+       edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
+
+       edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
+       edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+       edit->excised_meta[0] = assoc_array_node_to_ptr(node);
+       pr_devel("<--%s() = ok [insert node before]\n", __func__);
+       return true;
+
+all_leaves_cluster_together:
+       /* All the leaves, new and old, want to cluster together in this node
+        * in the same slot, so we have to replace this node with a shortcut to
+        * skip over the identical parts of the key and then place a pair of
+        * nodes, one inside the other, at the end of the shortcut and
+        * distribute the keys between them.
+        *
+        * Firstly we need to work out where the leaves start diverging as a
+        * bit position into their keys so that we know how big the shortcut
+        * needs to be.
+        *
+        * We only need to make a single pass of N of the N+1 leaves because if
+        * any keys differ between themselves at bit X then at least one of
+        * them must also differ with the base key at bit X or before.
+        */
+       pr_devel("all leaves cluster together\n");
+       diff = INT_MAX;
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
+                                         assoc_array_ptr_to_leaf(node->slots[i]));
+               if (x < diff) {
+                       BUG_ON(x < 0);
+                       diff = x;
+               }
+       }
+       BUG_ON(diff == INT_MAX);
+       BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
+
+       keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+       keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+       new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                        keylen * sizeof(unsigned long), GFP_KERNEL);
+       if (!new_s0)
+               return false;
+       edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
+
+       edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+       new_s0->back_pointer = node->back_pointer;
+       new_s0->parent_slot = node->parent_slot;
+       new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+       new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+       new_n0->parent_slot = 0;
+       new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
+       new_n1->parent_slot = -1; /* Need to calculate this */
+
+       new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
+       BUG_ON(level <= 0);
+
+       for (i = 0; i < keylen; i++)
+               new_s0->index_key[i] =
+                       ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
+
+       blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+       new_s0->index_key[keylen - 1] &= ~blank;
+
+       /* This now reduces to a node splitting exercise for which we'll need
+        * to regenerate the disparity table.
+        */
+       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+               ptr = node->slots[i];
+               base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
+                                                    level);
+               base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+               edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       }
+
+       base_seg = ops->get_key_chunk(index_key, level);
+       base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
+       edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
+       goto do_split_node;
+}
+
+/*
+ * Handle insertion into the middle of a shortcut.
+ */
+static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
+                                           const struct assoc_array_ops *ops,
+                                           struct assoc_array_walk_result *result)
+{
+       struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
+       struct assoc_array_node *node, *new_n0, *side;
+       unsigned long sc_segments, dissimilarity, blank;
+       size_t keylen;
+       int level, sc_level, diff;
+       int sc_slot;
+
+       shortcut        = result->wrong_shortcut.shortcut;
+       level           = result->wrong_shortcut.level;
+       sc_level        = result->wrong_shortcut.sc_level;
+       sc_segments     = result->wrong_shortcut.sc_segments;
+       dissimilarity   = result->wrong_shortcut.dissimilarity;
+
+       pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
+                __func__, level, dissimilarity, sc_level);
+
+       /* We need to split a shortcut and insert a node between the two
+        * pieces.  Zero-length pieces will be dispensed with entirely.
+        *
+        * First of all, we need to find out in which level the first
+        * difference was.
+        */
+       diff = __ffs(dissimilarity);
+       diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
+       diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
+       pr_devel("diff=%d\n", diff);
+
+       if (!shortcut->back_pointer) {
+               edit->set[0].ptr = &edit->array->root;
+       } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
+               node = assoc_array_ptr_to_node(shortcut->back_pointer);
+               edit->set[0].ptr = &node->slots[shortcut->parent_slot];
+       } else {
+               BUG();
+       }
+
+       edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
+
+       /* Create a new node now since we're going to need it anyway */
+       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n0)
+               return false;
+       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+       edit->adjust_count_on = new_n0;
+
+       /* Insert a new shortcut before the new node if this segment isn't of
+        * zero length - otherwise we just connect the new node directly to the
+        * parent.
+        */
+       level += ASSOC_ARRAY_LEVEL_STEP;
+       if (diff > level) {
+               pr_devel("pre-shortcut %d...%d\n", level, diff);
+               keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s0)
+                       return false;
+               edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
+               edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
+               new_s0->back_pointer = shortcut->back_pointer;
+               new_s0->parent_slot = shortcut->parent_slot;
+               new_s0->next_node = assoc_array_node_to_ptr(new_n0);
+               new_s0->skip_to_level = diff;
+
+               new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
+               new_n0->parent_slot = 0;
+
+               memcpy(new_s0->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+               pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
+               new_s0->index_key[keylen - 1] &= ~blank;
+       } else {
+               pr_devel("no pre-shortcut\n");
+               edit->set[0].to = assoc_array_node_to_ptr(new_n0);
+               new_n0->back_pointer = shortcut->back_pointer;
+               new_n0->parent_slot = shortcut->parent_slot;
+       }
+
+       side = assoc_array_ptr_to_node(shortcut->next_node);
+       new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
+
+       /* We need to know which slot in the new node is going to take a
+        * metadata pointer.
+        */
+       sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
+       sc_slot &= ASSOC_ARRAY_FAN_MASK;
+
+       pr_devel("new slot %lx >> %d -> %d\n",
+                sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
+
+       /* Determine whether we need to follow the new node with a replacement
+        * for the current shortcut.  We could in theory reuse the current
+        * shortcut if its parent slot number doesn't change - but that's a
+        * 1-in-16 chance so not worth expending the code upon.
+        */
+       level = diff + ASSOC_ARRAY_LEVEL_STEP;
+       if (level < shortcut->skip_to_level) {
+               pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+
+               new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
+                                keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s1)
+                       return false;
+               edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
+
+               new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
+               new_s1->parent_slot = sc_slot;
+               new_s1->next_node = shortcut->next_node;
+               new_s1->skip_to_level = shortcut->skip_to_level;
+
+               new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
+
+               memcpy(new_s1->index_key, shortcut->index_key,
+                      keylen * sizeof(unsigned long));
+
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
+       } else {
+               pr_devel("no post-shortcut\n");
+
+               /* We don't have to replace the pointed-to node as long as we
+                * use memory barriers to make sure the parent slot number is
+                * changed before the back pointer (the parent slot number is
+                * irrelevant to the old parent shortcut).
+                */
+               new_n0->slots[sc_slot] = shortcut->next_node;
+               edit->set_parent_slot[0].p = &side->parent_slot;
+               edit->set_parent_slot[0].to = sc_slot;
+               edit->set[1].ptr = &side->back_pointer;
+               edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+       }
+
+       /* Install the new leaf in a spare slot in the new node. */
+       if (sc_slot == 0)
+               edit->leaf_p = &new_n0->slots[1];
+       else
+               edit->leaf_p = &new_n0->slots[0];
+
+       pr_devel("<--%s() = ok [split shortcut]\n", __func__);
+       return edit;
+}
+
+/**
+ * assoc_array_insert - Script insertion of an object into an associative array
+ * @array: The array to insert into.
+ * @ops: The operations to use.
+ * @index_key: The key to insert at.
+ * @object: The object to insert.
+ *
+ * Precalculate and preallocate a script for the insertion or replacement of an
+ * object in an associative array.  This results in an edit script that can
+ * either be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key,
+                                           void *object)
+{
+       struct assoc_array_walk_result result;
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* The leaf pointer we're given must not have the bottom bit set as we
+        * use those for type-marking the pointer.  NULL pointers are also not
+        * allowed as they indicate an empty slot but we have to allow them
+        * here as they can be updated later.
+        */
+       BUG_ON(assoc_array_ptr_is_meta(object));
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+       edit->adjust_count_by = 1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_tree_empty:
+               /* Allocate a root node if there isn't one yet */
+               if (!assoc_array_insert_in_empty_tree(edit))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that doesn't have a node/shortcut pointer in
+                * the slot corresponding to the index key that we have to
+                * follow.
+                */
+               if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
+                                                          &result))
+                       goto enomem;
+               return edit;
+
+       case assoc_array_walk_found_wrong_shortcut:
+               /* We found a shortcut that didn't match our key in a slot we
+                * needed to follow.
+                */
+               if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
+                       goto enomem;
+               return edit;
+       }
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_insert_set_object - Set the new object pointer in an edit script
+ * @edit: The edit script to modify.
+ * @object: The object pointer to set.
+ *
+ * Change the object to be inserted in an edit script.  The object pointed to
+ * by the old object is not freed.  This must be done prior to applying the
+ * script.
+ */
+void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
+{
+       BUG_ON(!object);
+       edit->leaf = assoc_array_leaf_to_ptr(object);
+}
+
+struct assoc_array_delete_collapse_context {
+       struct assoc_array_node *node;
+       const void              *skip_leaf;
+       int                     slot;
+};
+
+/*
+ * Subtree collapse to node iterator.
+ */
+static int assoc_array_delete_collapse_iterator(const void *leaf,
+                                               void *iterator_data)
+{
+       struct assoc_array_delete_collapse_context *collapse = iterator_data;
+
+       if (leaf == collapse->skip_leaf)
+               return 0;
+
+       BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
+
+       collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
+       return 0;
+}
+
+/**
+ * assoc_array_delete - Script deletion of an object from an associative array
+ * @array: The array to search.
+ * @ops: The operations to use.
+ * @index_key: The key to the object.
+ *
+ * Precalculate and preallocate a script for the deletion of an object from an
+ * associative array.  This results in an edit script that can either be
+ * applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if the object was found,
+ * NULL if the object was not found or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
+                                           const struct assoc_array_ops *ops,
+                                           const void *index_key)
+{
+       struct assoc_array_delete_collapse_context collapse;
+       struct assoc_array_walk_result result;
+       struct assoc_array_node *node, *new_n0;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *ptr;
+       bool has_meta;
+       int slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->adjust_count_by = -1;
+
+       switch (assoc_array_walk(array, ops, index_key, &result)) {
+       case assoc_array_walk_found_terminal_node:
+               /* We found a node that should contain the leaf we've been
+                * asked to remove - *if* it's in the tree.
+                */
+               pr_devel("terminal_node\n");
+               node = result.terminal_node.node;
+
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+                       ptr = node->slots[slot];
+                       if (ptr &&
+                           assoc_array_ptr_is_leaf(ptr) &&
+                           ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+                                               index_key))
+                               goto found_leaf;
+               }
+       case assoc_array_walk_tree_empty:
+       case assoc_array_walk_found_wrong_shortcut:
+       default:
+               assoc_array_cancel_edit(edit);
+               pr_devel("not found\n");
+               return NULL;
+       }
+
+found_leaf:
+       BUG_ON(array->nr_leaves_on_tree <= 0);
+
+       /* In the simplest form of deletion we just clear the slot and release
+        * the leaf after a suitable interval.
+        */
+       edit->dead_leaf = node->slots[slot];
+       edit->set[0].ptr = &node->slots[slot];
+       edit->set[0].to = NULL;
+       edit->adjust_count_on = node;
+
+       /* If that concludes erasure of the last leaf, then delete the entire
+        * internal array.
+        */
+       if (array->nr_leaves_on_tree == 1) {
+               edit->set[1].ptr = &array->root;
+               edit->set[1].to = NULL;
+               edit->adjust_count_on = NULL;
+               edit->excised_subtree = array->root;
+               pr_devel("all gone\n");
+               return edit;
+       }
+
+       /* However, we'd also like to clear up some metadata blocks if we
+        * possibly can.
+        *
+        * We go for a simple algorithm of: if this node has FAN_OUT or fewer
+        * leaves in it, then attempt to collapse it - and attempt to
+        * recursively collapse up the tree.
+        *
+        * We could also try and collapse in partially filled subtrees to take
+        * up space in this node.
+        */
+       if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+               struct assoc_array_node *parent, *grandparent;
+               struct assoc_array_ptr *ptr;
+
+               /* First of all, we need to know if this node has metadata so
+                * that we don't try collapsing if all the leaves are already
+                * here.
+                */
+               has_meta = false;
+               for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                       ptr = node->slots[i];
+                       if (assoc_array_ptr_is_meta(ptr)) {
+                               has_meta = true;
+                               break;
+                       }
+               }
+
+               pr_devel("leaves: %ld [m=%d]\n",
+                        node->nr_leaves_on_branch - 1, has_meta);
+
+               /* Look further up the tree to see if we can collapse this node
+                * into a more proximal node too.
+                */
+               parent = node;
+       collapse_up:
+               pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
+
+               ptr = parent->back_pointer;
+               if (!ptr)
+                       goto do_collapse;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->back_pointer;
+                       if (!ptr)
+                               goto do_collapse;
+               }
+
+               grandparent = assoc_array_ptr_to_node(ptr);
+               if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
+                       parent = grandparent;
+                       goto collapse_up;
+               }
+
+       do_collapse:
+               /* There's no point collapsing if the original node has no meta
+                * pointers to discard and if we didn't merge into one of that
+                * node's ancestry.
+                */
+               if (has_meta || parent != node) {
+                       node = parent;
+
+                       /* Create a new node to collapse into */
+                       new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+                       if (!new_n0)
+                               goto enomem;
+                       edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
+
+                       new_n0->back_pointer = node->back_pointer;
+                       new_n0->parent_slot = node->parent_slot;
+                       new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
+                       edit->adjust_count_on = new_n0;
+
+                       collapse.node = new_n0;
+                       collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
+                       collapse.slot = 0;
+                       assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
+                                                   node->back_pointer,
+                                                   assoc_array_delete_collapse_iterator,
+                                                   &collapse);
+                       pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
+                       BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
+
+                       if (!node->back_pointer) {
+                               edit->set[1].ptr = &array->root;
+                       } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
+                               BUG();
+                       } else if (assoc_array_ptr_is_node(node->back_pointer)) {
+                               struct assoc_array_node *p =
+                                       assoc_array_ptr_to_node(node->back_pointer);
+                               edit->set[1].ptr = &p->slots[node->parent_slot];
+                       } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(node->back_pointer);
+                               edit->set[1].ptr = &s->next_node;
+                       }
+                       edit->set[1].to = assoc_array_node_to_ptr(new_n0);
+                       edit->excised_subtree = assoc_array_node_to_ptr(node);
+               }
+       }
+
+       return edit;
+
+enomem:
+       /* Clean up after an out of memory error */
+       pr_devel("enomem\n");
+       assoc_array_cancel_edit(edit);
+       return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * assoc_array_clear - Script deletion of all objects from an associative array
+ * @array: The array to clear.
+ * @ops: The operations to use.
+ *
+ * Precalculate and preallocate a script for the deletion of all the objects
+ * from an associative array.  This results in an edit script that can either
+ * be applied or cancelled.
+ *
+ * The function returns a pointer to an edit script if there are objects to be
+ * deleted, NULL if there are no objects in the array or -ENOMEM.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
+                                          const struct assoc_array_ops *ops)
+{
+       struct assoc_array_edit *edit;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return NULL;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return ERR_PTR(-ENOMEM);
+       edit->array = array;
+       edit->ops = ops;
+       edit->set[1].ptr = &array->root;
+       edit->set[1].to = NULL;
+       edit->excised_subtree = array->root;
+       edit->ops_for_excised_subtree = ops;
+       pr_devel("all gone\n");
+       return edit;
+}
+
+/*
+ * Handle the deferred destruction after an applied edit.
+ */
+static void assoc_array_rcu_cleanup(struct rcu_head *head)
+{
+       struct assoc_array_edit *edit =
+               container_of(head, struct assoc_array_edit, rcu);
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (edit->dead_leaf)
+               edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
+       for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
+               if (edit->excised_meta[i])
+                       kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
+
+       if (edit->excised_subtree) {
+               BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
+               if (assoc_array_ptr_is_node(edit->excised_subtree)) {
+                       struct assoc_array_node *n =
+                               assoc_array_ptr_to_node(edit->excised_subtree);
+                       n->back_pointer = NULL;
+               } else {
+                       struct assoc_array_shortcut *s =
+                               assoc_array_ptr_to_shortcut(edit->excised_subtree);
+                       s->back_pointer = NULL;
+               }
+               assoc_array_destroy_subtree(edit->excised_subtree,
+                                           edit->ops_for_excised_subtree);
+       }
+
+       kfree(edit);
+}
+
+/**
+ * assoc_array_apply_edit - Apply an edit script to an associative array
+ * @edit: The script to apply.
+ *
+ * Apply an edit script to an associative array to effect an insertion,
+ * deletion or clearance.  As the edit script includes preallocated memory,
+ * this is guaranteed not to fail.
+ *
+ * The edit script, dead objects and dead metadata will be scheduled for
+ * destruction after an RCU grace period to permit those doing read-only
+ * accesses on the array to continue to do so under the RCU read lock whilst
+ * the edit is taking place.
+ */
+void assoc_array_apply_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       smp_wmb();
+       if (edit->leaf_p)
+               *edit->leaf_p = edit->leaf;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
+               if (edit->set_parent_slot[i].p)
+                       *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
+               if (edit->set_backpointers[i])
+                       *edit->set_backpointers[i] = edit->set_backpointers_to;
+
+       smp_wmb();
+       for (i = 0; i < ARRAY_SIZE(edit->set); i++)
+               if (edit->set[i].ptr)
+                       *edit->set[i].ptr = edit->set[i].to;
+
+       if (edit->array->root == NULL) {
+               edit->array->nr_leaves_on_tree = 0;
+       } else if (edit->adjust_count_on) {
+               node = edit->adjust_count_on;
+               for (;;) {
+                       node->nr_leaves_on_branch += edit->adjust_count_by;
+
+                       ptr = node->back_pointer;
+                       if (!ptr)
+                               break;
+                       if (assoc_array_ptr_is_shortcut(ptr)) {
+                               shortcut = assoc_array_ptr_to_shortcut(ptr);
+                               ptr = shortcut->back_pointer;
+                               if (!ptr)
+                                       break;
+                       }
+                       BUG_ON(!assoc_array_ptr_is_node(ptr));
+                       node = assoc_array_ptr_to_node(ptr);
+               }
+
+               edit->array->nr_leaves_on_tree += edit->adjust_count_by;
+       }
+
+       call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
+}
+
+/**
+ * assoc_array_cancel_edit - Discard an edit script.
+ * @edit: The script to discard.
+ *
+ * Free an edit script and all the preallocated data it holds without making
+ * any changes to the associative array it was intended for.
+ *
+ * NOTE!  In the case of an insertion script, this does _not_ release the leaf
+ * that was to be inserted.  That is left to the caller.
+ */
+void assoc_array_cancel_edit(struct assoc_array_edit *edit)
+{
+       struct assoc_array_ptr *ptr;
+       int i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       /* Clean up after an out of memory error */
+       for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
+               ptr = edit->new_meta[i];
+               if (ptr) {
+                       if (assoc_array_ptr_is_node(ptr))
+                               kfree(assoc_array_ptr_to_node(ptr));
+                       else
+                               kfree(assoc_array_ptr_to_shortcut(ptr));
+               }
+       }
+       kfree(edit);
+}
+
+/**
+ * assoc_array_gc - Garbage collect an associative array.
+ * @array: The array to clean.
+ * @ops: The operations to use.
+ * @iterator: A callback function to pass judgement on each object.
+ * @iterator_data: Private data for the callback function.
+ *
+ * Collect garbage from an associative array and pack down the internal tree to
+ * save memory.
+ *
+ * The iterator function is asked to pass judgement upon each object in the
+ * array.  If it returns false, the object is discard and if it returns true,
+ * the object is kept.  If it returns true, it must increment the object's
+ * usage count (or whatever it needs to do to retain it) before returning.
+ *
+ * This function returns 0 if successful or -ENOMEM if out of memory.  In the
+ * latter case, the array is not changed.
+ *
+ * The caller should lock against other modifications and must continue to hold
+ * the lock until assoc_array_apply_edit() has been called.
+ *
+ * Accesses to the tree may take place concurrently with this function,
+ * provided they hold the RCU read lock.
+ */
+int assoc_array_gc(struct assoc_array *array,
+                  const struct assoc_array_ops *ops,
+                  bool (*iterator)(void *object, void *iterator_data),
+                  void *iterator_data)
+{
+       struct assoc_array_shortcut *shortcut, *new_s;
+       struct assoc_array_node *node, *new_n;
+       struct assoc_array_edit *edit;
+       struct assoc_array_ptr *cursor, *ptr;
+       struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
+       unsigned long nr_leaves_on_tree;
+       int keylen, slot, nr_free, next_slot, i;
+
+       pr_devel("-->%s()\n", __func__);
+
+       if (!array->root)
+               return 0;
+
+       edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
+       if (!edit)
+               return -ENOMEM;
+       edit->array = array;
+       edit->ops = ops;
+       edit->ops_for_excised_subtree = ops;
+       edit->set[0].ptr = &array->root;
+       edit->excised_subtree = array->root;
+
+       new_root = new_parent = NULL;
+       new_ptr_pp = &new_root;
+       cursor = array->root;
+
+descend:
+       /* If this point is a shortcut, then we need to duplicate it and
+        * advance the target cursor.
+        */
+       if (assoc_array_ptr_is_shortcut(cursor)) {
+               shortcut = assoc_array_ptr_to_shortcut(cursor);
+               keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
+               keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
+               new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
+                               keylen * sizeof(unsigned long), GFP_KERNEL);
+               if (!new_s)
+                       goto enomem;
+               pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
+               memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
+                                        keylen * sizeof(unsigned long)));
+               new_s->back_pointer = new_parent;
+               new_s->parent_slot = shortcut->parent_slot;
+               *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
+               new_ptr_pp = &new_s->next_node;
+               cursor = shortcut->next_node;
+       }
+
+       /* Duplicate the node at this position */
+       node = assoc_array_ptr_to_node(cursor);
+       new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
+       if (!new_n)
+               goto enomem;
+       pr_devel("dup node %p -> %p\n", node, new_n);
+       new_n->back_pointer = new_parent;
+       new_n->parent_slot = node->parent_slot;
+       *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
+       new_ptr_pp = NULL;
+       slot = 0;
+
+continue_node:
+       /* Filter across any leaves and gc any subtrees */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = node->slots[slot];
+               if (!ptr)
+                       continue;
+
+               if (assoc_array_ptr_is_leaf(ptr)) {
+                       if (iterator(assoc_array_ptr_to_leaf(ptr),
+                                    iterator_data))
+                               /* The iterator will have done any reference
+                                * counting on the object for us.
+                                */
+                               new_n->slots[slot] = ptr;
+                       continue;
+               }
+
+               new_ptr_pp = &new_n->slots[slot];
+               cursor = ptr;
+               goto descend;
+       }
+
+       pr_devel("-- compress node %p --\n", new_n);
+
+       /* Count up the number of empty slots in this node and work out the
+        * subtree leaf count.
+        */
+       new_n->nr_leaves_on_branch = 0;
+       nr_free = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = new_n->slots[slot];
+               if (!ptr)
+                       nr_free++;
+               else if (assoc_array_ptr_is_leaf(ptr))
+                       new_n->nr_leaves_on_branch++;
+       }
+       pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
+
+       /* See what we can fold in */
+       next_slot = 0;
+       for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               struct assoc_array_shortcut *s;
+               struct assoc_array_node *child;
+
+               ptr = new_n->slots[slot];
+               if (!ptr || assoc_array_ptr_is_leaf(ptr))
+                       continue;
+
+               s = NULL;
+               if (assoc_array_ptr_is_shortcut(ptr)) {
+                       s = assoc_array_ptr_to_shortcut(ptr);
+                       ptr = s->next_node;
+               }
+
+               child = assoc_array_ptr_to_node(ptr);
+               new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
+
+               if (child->nr_leaves_on_branch <= nr_free + 1) {
+                       /* Fold the child node into this one */
+                       pr_devel("[%d] fold node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+
+                       /* We would already have reaped an intervening shortcut
+                        * on the way back up the tree.
+                        */
+                       BUG_ON(s);
+
+                       new_n->slots[slot] = NULL;
+                       nr_free++;
+                       if (slot < next_slot)
+                               next_slot = slot;
+                       for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
+                               struct assoc_array_ptr *p = child->slots[i];
+                               if (!p)
+                                       continue;
+                               BUG_ON(assoc_array_ptr_is_meta(p));
+                               while (new_n->slots[next_slot])
+                                       next_slot++;
+                               BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
+                               new_n->slots[next_slot++] = p;
+                               nr_free--;
+                       }
+                       kfree(child);
+               } else {
+                       pr_devel("[%d] retain node %lu/%d [nx %d]\n",
+                                slot, child->nr_leaves_on_branch, nr_free + 1,
+                                next_slot);
+               }
+       }
+
+       pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
+
+       nr_leaves_on_tree = new_n->nr_leaves_on_branch;
+
+       /* Excise this node if it is singly occupied by a shortcut */
+       if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
+               for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
+                       if ((ptr = new_n->slots[slot]))
+                               break;
+
+               if (assoc_array_ptr_is_meta(ptr) &&
+                   assoc_array_ptr_is_shortcut(ptr)) {
+                       pr_devel("excise node %p with 1 shortcut\n", new_n);
+                       new_s = assoc_array_ptr_to_shortcut(ptr);
+                       new_parent = new_n->back_pointer;
+                       slot = new_n->parent_slot;
+                       kfree(new_n);
+                       if (!new_parent) {
+                               new_s->back_pointer = NULL;
+                               new_s->parent_slot = 0;
+                               new_root = ptr;
+                               goto gc_complete;
+                       }
+
+                       if (assoc_array_ptr_is_shortcut(new_parent)) {
+                               /* We can discard any preceding shortcut also */
+                               struct assoc_array_shortcut *s =
+                                       assoc_array_ptr_to_shortcut(new_parent);
+
+                               pr_devel("excise preceding shortcut\n");
+
+                               new_parent = new_s->back_pointer = s->back_pointer;
+                               slot = new_s->parent_slot = s->parent_slot;
+                               kfree(s);
+                               if (!new_parent) {
+                                       new_s->back_pointer = NULL;
+                                       new_s->parent_slot = 0;
+                                       new_root = ptr;
+                                       goto gc_complete;
+                               }
+                       }
+
+                       new_s->back_pointer = new_parent;
+                       new_s->parent_slot = slot;
+                       new_n = assoc_array_ptr_to_node(new_parent);
+                       new_n->slots[slot] = ptr;
+                       goto ascend_old_tree;
+               }
+       }
+
+       /* Excise any shortcuts we might encounter that point to nodes that
+        * only contain leaves.
+        */
+       ptr = new_n->back_pointer;
+       if (!ptr)
+               goto gc_complete;
+
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               new_s = assoc_array_ptr_to_shortcut(ptr);
+               new_parent = new_s->back_pointer;
+               slot = new_s->parent_slot;
+
+               if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
+                       struct assoc_array_node *n;
+
+                       pr_devel("excise shortcut\n");
+                       new_n->back_pointer = new_parent;
+                       new_n->parent_slot = slot;
+                       kfree(new_s);
+                       if (!new_parent) {
+                               new_root = assoc_array_node_to_ptr(new_n);
+                               goto gc_complete;
+                       }
+
+                       n = assoc_array_ptr_to_node(new_parent);
+                       n->slots[slot] = assoc_array_node_to_ptr(new_n);
+               }
+       } else {
+               new_parent = ptr;
+       }
+       new_n = assoc_array_ptr_to_node(new_parent);
+
+ascend_old_tree:
+       ptr = node->back_pointer;
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               slot = shortcut->parent_slot;
+               cursor = shortcut->back_pointer;
+       } else {
+               slot = node->parent_slot;
+               cursor = ptr;
+       }
+       BUG_ON(!ptr);
+       node = assoc_array_ptr_to_node(cursor);
+       slot++;
+       goto continue_node;
+
+gc_complete:
+       edit->set[0].to = new_root;
+       assoc_array_apply_edit(edit);
+       edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
+       return 0;
+
+enomem:
+       pr_devel("enomem\n");
+       assoc_array_destroy_subtree(new_root, edit->ops);
+       kfree(edit);
+       return -ENOMEM;
+}
index 657979f71bef0a0b3331eb804c0921d1f7afe6a8..bf076d281d4045da0b4b780ecd19a19db3987485 100644 (file)
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
        kfree(a);
 }
 EXPORT_SYMBOL_GPL(mpi_free);
+
+MODULE_DESCRIPTION("Multiprecision maths library");
+MODULE_LICENSE("GPL");
index db0e5cd34c70866e097e0af91d745d1d96d579fe..91c4117637ae1fdf33d385ea5c3f9eba601e03a8 100644 (file)
@@ -1353,6 +1353,8 @@ static void render_out_of_line_list(FILE *out)
                        render_opcode(out, "ASN1_OP_END_SET_OF%s,\n", act);
                        render_opcode(out, "_jump_target(%u),\n", entry);
                        break;
+               default:
+                       break;
                }
                if (e->action)
                        render_opcode(out, "_action(ACT_%s),\n",
index c26c81e925712fbc2ba38264f477bdd0a02f548d..a5918e01a4f71a6e97abc664682bd23f75bc544f 100644 (file)
@@ -16,7 +16,6 @@ obj-$(CONFIG_MMU)                     += min_addr.o
 # Object file lists
 obj-$(CONFIG_SECURITY)                 += security.o capability.o
 obj-$(CONFIG_SECURITYFS)               += inode.o
-# Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)           += smack/built-in.o
 obj-$(CONFIG_AUDIT)                    += lsm_audit.o
index dbeb9bc27b24a14b7f546a44843bba2757db77cb..8b4f24ae43381de05af67271edd9a8ddd57c651f 100644 (file)
@@ -777,9 +777,15 @@ static int cap_xfrm_policy_delete_security(struct xfrm_sec_ctx *ctx)
        return 0;
 }
 
-static int cap_xfrm_state_alloc_security(struct xfrm_state *x,
-                                        struct xfrm_user_sec_ctx *sec_ctx,
-                                        u32 secid)
+static int cap_xfrm_state_alloc(struct xfrm_state *x,
+                               struct xfrm_user_sec_ctx *sec_ctx)
+{
+       return 0;
+}
+
+static int cap_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                       struct xfrm_sec_ctx *polsec,
+                                       u32 secid)
 {
        return 0;
 }
@@ -1101,7 +1107,8 @@ void __init security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, xfrm_policy_clone_security);
        set_to_cap_if_null(ops, xfrm_policy_free_security);
        set_to_cap_if_null(ops, xfrm_policy_delete_security);
-       set_to_cap_if_null(ops, xfrm_state_alloc_security);
+       set_to_cap_if_null(ops, xfrm_state_alloc);
+       set_to_cap_if_null(ops, xfrm_state_alloc_acquire);
        set_to_cap_if_null(ops, xfrm_state_free_security);
        set_to_cap_if_null(ops, xfrm_state_delete_security);
        set_to_cap_if_null(ops, xfrm_policy_lookup);
index a90d6d300dbd3b0b5849cae74af644e576e6dc45..53d8748c95642e7adaea1f5aece6660212ad8556 100644 (file)
@@ -4,6 +4,7 @@
 
 config KEYS
        bool "Enable access key retention support"
+       select ASSOCIATIVE_ARRAY
        help
          This option provides support for retaining authentication tokens and
          access keys in the kernel.
@@ -19,6 +20,34 @@ config KEYS
 
          If you are unsure as to whether this is required, answer N.
 
+config PERSISTENT_KEYRINGS
+       bool "Enable register of persistent per-UID keyrings"
+       depends on KEYS
+       help
+         This option provides a register of persistent per-UID keyrings,
+         primarily aimed at Kerberos key storage.  The keyrings are persistent
+         in the sense that they stay around after all processes of that UID
+         have exited, not that they survive the machine being rebooted.
+
+         A particular keyring may be accessed by either the user whose keyring
+         it is or by a process with administrative privileges.  The active
+         LSMs gets to rule on which admin-level processes get to access the
+         cache.
+
+         Keyrings are created and added into the register upon demand and get
+         removed if they expire (a default timeout is set upon creation).
+
+config BIG_KEYS
+       tristate "Large payload keys"
+       depends on KEYS
+       depends on TMPFS
+       help
+         This option provides support for holding large keys within the kernel
+         (for example Kerberos ticket caches).  The data may be stored out to
+         swapspace by tmpfs.
+
+         If you are unsure as to whether this is required, answer N.
+
 config TRUSTED_KEYS
        tristate "TRUSTED KEYS"
        depends on KEYS && TCG_TPM
index 504aaa008388c1595716e40f0e62e5fcd44fba71..dfb3a7bededf548ac1eed24b094de858e7a07df6 100644 (file)
@@ -18,9 +18,11 @@ obj-y := \
 obj-$(CONFIG_KEYS_COMPAT) += compat.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
+obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
 
 #
 # Key types
 #
+obj-$(CONFIG_BIG_KEYS) += big_key.o
 obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
 obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
new file mode 100644 (file)
index 0000000..5f9defc
--- /dev/null
@@ -0,0 +1,204 @@
+/* Large capacity key type
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/file.h>
+#include <linux/shmem_fs.h>
+#include <linux/err.h>
+#include <keys/user-type.h>
+#include <keys/big_key-type.h>
+
+MODULE_LICENSE("GPL");
+
+/*
+ * If the data is under this limit, there's no point creating a shm file to
+ * hold it as the permanently resident metadata for the shmem fs will be at
+ * least as large as the data.
+ */
+#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry))
+
+/*
+ * big_key defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_big_key = {
+       .name                   = "big_key",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = big_key_instantiate,
+       .match                  = user_match,
+       .revoke                 = big_key_revoke,
+       .destroy                = big_key_destroy,
+       .describe               = big_key_describe,
+       .read                   = big_key_read,
+};
+
+/*
+ * Instantiate a big key
+ */
+int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+       struct file *file;
+       ssize_t written;
+       size_t datalen = prep->datalen;
+       int ret;
+
+       ret = -EINVAL;
+       if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data)
+               goto error;
+
+       /* Set an arbitrary quota */
+       ret = key_payload_reserve(key, 16);
+       if (ret < 0)
+               goto error;
+
+       key->type_data.x[1] = datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               /* Create a shmem file to store the data in.  This will permit the data
+                * to be swapped out if needed.
+                *
+                * TODO: Encrypt the stored data with a temporary key.
+                */
+               file = shmem_file_setup("", datalen, 0);
+               if (IS_ERR(file))
+                       goto err_quota;
+
+               written = kernel_write(file, prep->data, prep->datalen, 0);
+               if (written != datalen) {
+                       if (written >= 0)
+                               ret = -ENOMEM;
+                       goto err_fput;
+               }
+
+               /* Pin the mount and dentry to the key so that we can open it again
+                * later
+                */
+               *path = file->f_path;
+               path_get(path);
+               fput(file);
+       } else {
+               /* Just store the data in a buffer */
+               void *data = kmalloc(datalen, GFP_KERNEL);
+               if (!data) {
+                       ret = -ENOMEM;
+                       goto err_quota;
+               }
+
+               key->payload.data = memcpy(data, prep->data, prep->datalen);
+       }
+       return 0;
+
+err_fput:
+       fput(file);
+err_quota:
+       key_payload_reserve(key, 0);
+error:
+       return ret;
+}
+
+/*
+ * dispose of the links from a revoked keyring
+ * - called with the key sem write-locked
+ */
+void big_key_revoke(struct key *key)
+{
+       struct path *path = (struct path *)&key->payload.data2;
+
+       /* clear the quota */
+       key_payload_reserve(key, 0);
+       if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD)
+               vfs_truncate(path, 0);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a big_key key
+ */
+void big_key_destroy(struct key *key)
+{
+       if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               path_put(path);
+               path->mnt = NULL;
+               path->dentry = NULL;
+       } else {
+               kfree(key->payload.data);
+               key->payload.data = NULL;
+       }
+}
+
+/*
+ * describe the big_key key
+ */
+void big_key_describe(const struct key *key, struct seq_file *m)
+{
+       unsigned long datalen = key->type_data.x[1];
+
+       seq_puts(m, key->description);
+
+       if (key_is_instantiated(key))
+               seq_printf(m, ": %lu [%s]",
+                          datalen,
+                          datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
+}
+
+/*
+ * read the key data
+ * - the key's semaphore is read-locked
+ */
+long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
+{
+       unsigned long datalen = key->type_data.x[1];
+       long ret;
+
+       if (!buffer || buflen < datalen)
+               return datalen;
+
+       if (datalen > BIG_KEY_FILE_THRESHOLD) {
+               struct path *path = (struct path *)&key->payload.data2;
+               struct file *file;
+               loff_t pos;
+
+               file = dentry_open(path, O_RDONLY, current_cred());
+               if (IS_ERR(file))
+                       return PTR_ERR(file);
+
+               pos = 0;
+               ret = vfs_read(file, buffer, datalen, &pos);
+               fput(file);
+               if (ret >= 0 && ret != datalen)
+                       ret = -EIO;
+       } else {
+               ret = datalen;
+               if (copy_to_user(buffer, key->payload.data, datalen) != 0)
+                       ret = -EFAULT;
+       }
+
+       return ret;
+}
+
+/*
+ * Module stuff
+ */
+static int __init big_key_init(void)
+{
+       return register_key_type(&key_type_big_key);
+}
+
+static void __exit big_key_cleanup(void)
+{
+       unregister_key_type(&key_type_big_key);
+}
+
+module_init(big_key_init);
+module_exit(big_key_cleanup);
index d65fa7fa29ba1a53b1ef4fb6d76c7aeafb7da65a..bbd32c729dbb4e019d1461116b84c25107e35ab8 100644 (file)
@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
        case KEYCTL_INVALIDATE:
                return keyctl_invalidate_key(arg2);
 
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent(arg2, arg3);
+
        default:
                return -EOPNOTSUPP;
        }
index d67c97bb10256d5dc5a9b74b3b8aaa37022f96b1..cce621c33dce559b346e44d247f70076f4488a85 100644 (file)
@@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype)
        kleave("");
 }
 
+static int key_gc_keyring_func(const void *object, void *iterator_data)
+{
+       const struct key *key = object;
+       time_t *limit = iterator_data;
+       return key_is_dead(key, *limit);
+}
+
 /*
  * Garbage collect pointers from a keyring.
  *
@@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype)
  */
 static void key_gc_keyring(struct key *keyring, time_t limit)
 {
-       struct keyring_list *klist;
-       int loop;
+       int result;
 
-       kenter("%x", key_serial(keyring));
+       kenter("%x{%s}", keyring->serial, keyring->description ?: "");
 
        if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
                              (1 << KEY_FLAG_REVOKED)))
@@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit)
 
        /* scan the keyring looking for dead keys */
        rcu_read_lock();
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (!klist)
-               goto unlock_dont_gc;
-
-       loop = klist->nkeys;
-       smp_rmb();
-       for (loop--; loop >= 0; loop--) {
-               struct key *key = rcu_dereference(klist->keys[loop]);
-               if (key_is_dead(key, limit))
-                       goto do_gc;
-       }
-
-unlock_dont_gc:
+       result = assoc_array_iterate(&keyring->keys,
+                                    key_gc_keyring_func, &limit);
        rcu_read_unlock();
+       if (result == true)
+               goto do_gc;
+
 dont_gc:
        kleave(" [no gc]");
        return;
 
 do_gc:
-       rcu_read_unlock();
-
        keyring_gc(keyring, limit);
        kleave(" [gc]");
 }
@@ -392,7 +388,6 @@ found_unreferenced_key:
         */
 found_keyring:
        spin_unlock(&key_serial_lock);
-       kdebug("scan keyring %d", key->serial);
        key_gc_keyring(key, limit);
        goto maybe_resched;
 
index d4f1468b9b50f46cd7d739544902a77a3ff40384..80b2aac4f50ceda614d03c815f7638aa88a0c933 100644 (file)
@@ -89,42 +89,53 @@ extern struct key_type *key_type_lookup(const char *type);
 extern void key_type_put(struct key_type *ktype);
 
 extern int __key_link_begin(struct key *keyring,
-                           const struct key_type *type,
-                           const char *description,
-                           unsigned long *_prealloc);
+                           const struct keyring_index_key *index_key,
+                           struct assoc_array_edit **_edit);
 extern int __key_link_check_live_key(struct key *keyring, struct key *key);
-extern void __key_link(struct key *keyring, struct key *key,
-                      unsigned long *_prealloc);
+extern void __key_link(struct key *key, struct assoc_array_edit **_edit);
 extern void __key_link_end(struct key *keyring,
-                          struct key_type *type,
-                          unsigned long prealloc);
+                          const struct keyring_index_key *index_key,
+                          struct assoc_array_edit *edit);
 
-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                                     const struct key_type *type,
-                                     const char *description,
-                                     key_perm_t perm);
+extern key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                                   const struct keyring_index_key *index_key);
 
 extern struct key *keyring_search_instkey(struct key *keyring,
                                          key_serial_t target_id);
 
+extern int iterate_over_keyring(const struct key *keyring,
+                               int (*func)(const struct key *key, void *data),
+                               void *data);
+
 typedef int (*key_match_func_t)(const struct key *, const void *);
 
+struct keyring_search_context {
+       struct keyring_index_key index_key;
+       const struct cred       *cred;
+       key_match_func_t        match;
+       const void              *match_data;
+       unsigned                flags;
+#define KEYRING_SEARCH_LOOKUP_TYPE     0x0001  /* [as type->def_lookup_type] */
+#define KEYRING_SEARCH_NO_STATE_CHECK  0x0002  /* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK  0x0004  /* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME  0x0008  /* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM   0x0010  /* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020  /* Give an error on excessive depth */
+
+       int (*iterator)(const void *object, void *iterator_data);
+
+       /* Internal stuff */
+       int                     skipped_ret;
+       bool                    possessed;
+       key_ref_t               result;
+       struct timespec         now;
+};
+
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                                   const struct cred *cred,
-                                   struct key_type *type,
-                                   const void *description,
-                                   key_match_func_t match,
-                                   bool no_state_check);
-
-extern key_ref_t search_my_process_keyrings(struct key_type *type,
-                                           const void *description,
-                                           key_match_func_t match,
-                                           bool no_state_check,
-                                           const struct cred *cred);
-extern key_ref_t search_process_keyrings(struct key_type *type,
-                                        const void *description,
-                                        key_match_func_t match,
-                                        const struct cred *cred);
+                                   struct keyring_search_context *ctx);
+
+extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
+extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
 
 extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
 
@@ -202,7 +213,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
 /*
  * Determine whether a key is dead.
  */
-static inline bool key_is_dead(struct key *key, time_t limit)
+static inline bool key_is_dead(const struct key *key, time_t limit)
 {
        return
                key->flags & ((1 << KEY_FLAG_DEAD) |
@@ -244,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t);
 extern long keyctl_instantiate_key_common(key_serial_t,
                                          const struct iovec *,
                                          unsigned, size_t, key_serial_t);
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+extern long keyctl_get_persistent(uid_t, key_serial_t);
+extern unsigned persistent_keyring_expiry;
+#else
+static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring)
+{
+       return -EOPNOTSUPP;
+}
+#endif
 
 /*
  * Debugging key validation
index 8fb7c7bd465769cb5dca49e6d6f1ad011c75de63..d331ea9ef380562d177c6c037d1493680ed48782 100644 (file)
@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                }
        }
 
-       desclen = strlen(desc) + 1;
-       quotalen = desclen + type->def_datalen;
+       desclen = strlen(desc);
+       quotalen = desclen + 1 + type->def_datalen;
 
        /* get hold of the key tracking for this user */
        user = key_user_lookup(uid);
@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                goto no_memory_2;
 
        if (desc) {
-               key->description = kmemdup(desc, desclen, GFP_KERNEL);
+               key->index_key.desc_len = desclen;
+               key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL);
                if (!key->description)
                        goto no_memory_3;
        }
@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
        atomic_set(&key->usage, 1);
        init_rwsem(&key->sem);
        lockdep_set_class(&key->sem, &type->lock_class);
-       key->type = type;
+       key->index_key.type = type;
        key->user = user;
        key->quotalen = quotalen;
        key->datalen = type->def_datalen;
@@ -299,6 +300,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 
        if (!(flags & KEY_ALLOC_NOT_IN_QUOTA))
                key->flags |= 1 << KEY_FLAG_IN_QUOTA;
+       if (flags & KEY_ALLOC_TRUSTED)
+               key->flags |= 1 << KEY_FLAG_TRUSTED;
 
        memset(&key->type_data, 0, sizeof(key->type_data));
 
@@ -408,7 +411,7 @@ static int __key_instantiate_and_link(struct key *key,
                                      struct key_preparsed_payload *prep,
                                      struct key *keyring,
                                      struct key *authkey,
-                                     unsigned long *_prealloc)
+                                     struct assoc_array_edit **_edit)
 {
        int ret, awaken;
 
@@ -435,7 +438,7 @@ static int __key_instantiate_and_link(struct key *key,
 
                        /* and link it into the destination keyring */
                        if (keyring)
-                               __key_link(keyring, key, _prealloc);
+                               __key_link(key, _edit);
 
                        /* disable the authorisation key */
                        if (authkey)
@@ -475,7 +478,7 @@ int key_instantiate_and_link(struct key *key,
                             struct key *authkey)
 {
        struct key_preparsed_payload prep;
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        int ret;
 
        memset(&prep, 0, sizeof(prep));
@@ -489,17 +492,15 @@ int key_instantiate_and_link(struct key *key,
        }
 
        if (keyring) {
-               ret = __key_link_begin(keyring, key->type, key->description,
-                                      &prealloc);
+               ret = __key_link_begin(keyring, &key->index_key, &edit);
                if (ret < 0)
                        goto error_free_preparse;
        }
 
-       ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
-                                        &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit);
 
        if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
 
 error_free_preparse:
        if (key->type->preparse)
@@ -537,7 +538,7 @@ int key_reject_and_link(struct key *key,
                        struct key *keyring,
                        struct key *authkey)
 {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        struct timespec now;
        int ret, awaken, link_ret = 0;
 
@@ -548,8 +549,7 @@ int key_reject_and_link(struct key *key,
        ret = -EBUSY;
 
        if (keyring)
-               link_ret = __key_link_begin(keyring, key->type,
-                                           key->description, &prealloc);
+               link_ret = __key_link_begin(keyring, &key->index_key, &edit);
 
        mutex_lock(&key_construction_mutex);
 
@@ -571,7 +571,7 @@ int key_reject_and_link(struct key *key,
 
                /* and link it into the destination keyring */
                if (keyring && link_ret == 0)
-                       __key_link(keyring, key, &prealloc);
+                       __key_link(key, &edit);
 
                /* disable the authorisation key */
                if (authkey)
@@ -581,7 +581,7 @@ int key_reject_and_link(struct key *key,
        mutex_unlock(&key_construction_mutex);
 
        if (keyring)
-               __key_link_end(keyring, key->type, prealloc);
+               __key_link_end(keyring, &key->index_key, edit);
 
        /* wake up anyone waiting for a key to be constructed */
        if (awaken)
@@ -645,7 +645,7 @@ found:
        /* this races with key_put(), but that doesn't matter since key_put()
         * doesn't actually change the key
         */
-       atomic_inc(&key->usage);
+       __key_get(key);
 
 error:
        spin_unlock(&key_serial_lock);
@@ -780,25 +780,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                               key_perm_t perm,
                               unsigned long flags)
 {
-       unsigned long prealloc;
+       struct keyring_index_key index_key = {
+               .description    = description,
+       };
        struct key_preparsed_payload prep;
+       struct assoc_array_edit *edit;
        const struct cred *cred = current_cred();
-       struct key_type *ktype;
        struct key *keyring, *key = NULL;
        key_ref_t key_ref;
        int ret;
 
        /* look up the key type to see if it's one of the registered kernel
         * types */
-       ktype = key_type_lookup(type);
-       if (IS_ERR(ktype)) {
+       index_key.type = key_type_lookup(type);
+       if (IS_ERR(index_key.type)) {
                key_ref = ERR_PTR(-ENODEV);
                goto error;
        }
 
        key_ref = ERR_PTR(-EINVAL);
-       if (!ktype->match || !ktype->instantiate ||
-           (!description && !ktype->preparse))
+       if (!index_key.type->match || !index_key.type->instantiate ||
+           (!index_key.description && !index_key.type->preparse))
                goto error_put_type;
 
        keyring = key_ref_to_ptr(keyring_ref);
@@ -812,21 +814,28 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
        memset(&prep, 0, sizeof(prep));
        prep.data = payload;
        prep.datalen = plen;
-       prep.quotalen = ktype->def_datalen;
-       if (ktype->preparse) {
-               ret = ktype->preparse(&prep);
+       prep.quotalen = index_key.type->def_datalen;
+       prep.trusted = flags & KEY_ALLOC_TRUSTED;
+       if (index_key.type->preparse) {
+               ret = index_key.type->preparse(&prep);
                if (ret < 0) {
                        key_ref = ERR_PTR(ret);
                        goto error_put_type;
                }
-               if (!description)
-                       description = prep.description;
+               if (!index_key.description)
+                       index_key.description = prep.description;
                key_ref = ERR_PTR(-EINVAL);
-               if (!description)
+               if (!index_key.description)
                        goto error_free_prep;
        }
+       index_key.desc_len = strlen(index_key.description);
+
+       key_ref = ERR_PTR(-EPERM);
+       if (!prep.trusted && test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags))
+               goto error_free_prep;
+       flags |= prep.trusted ? KEY_ALLOC_TRUSTED : 0;
 
-       ret = __key_link_begin(keyring, ktype, description, &prealloc);
+       ret = __key_link_begin(keyring, &index_key, &edit);
        if (ret < 0) {
                key_ref = ERR_PTR(ret);
                goto error_free_prep;
@@ -844,10 +853,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
         * key of the same type and description in the destination keyring and
         * update that instead if possible
         */
-       if (ktype->update) {
-               key_ref = __keyring_search_one(keyring_ref, ktype, description,
-                                              0);
-               if (!IS_ERR(key_ref))
+       if (index_key.type->update) {
+               key_ref = find_key_to_update(keyring_ref, &index_key);
+               if (key_ref)
                        goto found_matching_key;
        }
 
@@ -856,23 +864,24 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
                perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
                perm |= KEY_USR_VIEW;
 
-               if (ktype->read)
+               if (index_key.type->read)
                        perm |= KEY_POS_READ;
 
-               if (ktype == &key_type_keyring || ktype->update)
+               if (index_key.type == &key_type_keyring ||
+                   index_key.type->update)
                        perm |= KEY_POS_WRITE;
        }
 
        /* allocate a new key */
-       key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred,
-                       perm, flags);
+       key = key_alloc(index_key.type, index_key.description,
+                       cred->fsuid, cred->fsgid, cred, perm, flags);
        if (IS_ERR(key)) {
                key_ref = ERR_CAST(key);
                goto error_link_end;
        }
 
        /* instantiate it and link it into the target keyring */
-       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
+       ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit);
        if (ret < 0) {
                key_put(key);
                key_ref = ERR_PTR(ret);
@@ -882,12 +891,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
        key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
 error_link_end:
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
 error_free_prep:
-       if (ktype->preparse)
-               ktype->free_preparse(&prep);
+       if (index_key.type->preparse)
+               index_key.type->free_preparse(&prep);
 error_put_type:
-       key_type_put(ktype);
+       key_type_put(index_key.type);
 error:
        return key_ref;
 
@@ -895,7 +904,7 @@ error:
        /* we found a matching key, so we're going to try to update it
         * - we can drop the locks first as we have the key pinned
         */
-       __key_link_end(keyring, ktype, prealloc);
+       __key_link_end(keyring, &index_key, edit);
 
        key_ref = __key_update(key_ref, &prep);
        goto error_free_prep;
index 33cfd27b4de29650ae6ad0e1eb45646714a00f27..cee72ce642221e816968cb81069407fe01edb138 100644 (file)
@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case KEYCTL_INVALIDATE:
                return keyctl_invalidate_key((key_serial_t) arg2);
 
+       case KEYCTL_GET_PERSISTENT:
+               return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3);
+
        default:
                return -EOPNOTSUPP;
        }
index 6ece7f2e5707f45c2736ca4a05504c2dd391ea00..9b6f6e09b50ccdb82b55d88b493c41fd9920c66a 100644 (file)
@@ -1,6 +1,6 @@
 /* Keyring handling
  *
- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <keys/keyring-type.h>
+#include <keys/user-type.h>
+#include <linux/assoc_array_priv.h>
 #include <linux/uaccess.h>
 #include "internal.h"
 
-#define rcu_dereference_locked_keyring(keyring)                                \
-       (rcu_dereference_protected(                                     \
-               (keyring)->payload.subscriptions,                       \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define rcu_deref_link_locked(klist, index, keyring)                   \
-       (rcu_dereference_protected(                                     \
-               (klist)->keys[index],                                   \
-               rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem)))
-
-#define MAX_KEYRING_LINKS                                              \
-       min_t(size_t, USHRT_MAX - 1,                                    \
-             ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *)))
-
-#define KEY_LINK_FIXQUOTA 1UL
-
 /*
  * When plumbing the depths of the key tree, this sets a hard limit
  * set on how deep we're willing to go.
  */
 #define KEYRING_NAME_HASH_SIZE (1 << 5)
 
+/*
+ * We mark pointers we pass to the associative array with bit 1 set if
+ * they're keyrings and clear otherwise.
+ */
+#define KEYRING_PTR_SUBTYPE    0x2UL
+
+static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x)
+{
+       return (unsigned long)x & KEYRING_PTR_SUBTYPE;
+}
+static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x)
+{
+       void *object = assoc_array_ptr_to_leaf(x);
+       return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE);
+}
+static inline void *keyring_key_to_ptr(struct key *key)
+{
+       if (key->type == &key_type_keyring)
+               return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE);
+       return key;
+}
+
 static struct list_head        keyring_name_hash[KEYRING_NAME_HASH_SIZE];
 static DEFINE_RWLOCK(keyring_name_lock);
 
@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc)
  */
 static int keyring_instantiate(struct key *keyring,
                               struct key_preparsed_payload *prep);
-static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_revoke(struct key *keyring);
 static void keyring_destroy(struct key *keyring);
 static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring,
 
 struct key_type key_type_keyring = {
        .name           = "keyring",
-       .def_datalen    = sizeof(struct keyring_list),
+       .def_datalen    = 0,
        .instantiate    = keyring_instantiate,
-       .match          = keyring_match,
+       .match          = user_match,
        .revoke         = keyring_revoke,
        .destroy        = keyring_destroy,
        .describe       = keyring_describe,
@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring,
 
        ret = -EINVAL;
        if (prep->datalen == 0) {
+               assoc_array_init(&keyring->keys);
                /* make the keyring available by name if it has one */
                keyring_publish_name(keyring);
                ret = 0;
@@ -136,14 +144,225 @@ static int keyring_instantiate(struct key *keyring,
 }
 
 /*
- * Match keyrings on their name
+ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit.  Ideally we'd
+ * fold the carry back too, but that requires inline asm.
+ */
+static u64 mult_64x32_and_fold(u64 x, u32 y)
+{
+       u64 hi = (u64)(u32)(x >> 32) * y;
+       u64 lo = (u64)(u32)(x) * y;
+       return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32);
+}
+
+/*
+ * Hash a key type and description.
  */
-static int keyring_match(const struct key *keyring, const void *description)
+static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key)
 {
-       return keyring->description &&
-               strcmp(keyring->description, description) == 0;
+       const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP;
+       const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK;
+       const char *description = index_key->description;
+       unsigned long hash, type;
+       u32 piece;
+       u64 acc;
+       int n, desc_len = index_key->desc_len;
+
+       type = (unsigned long)index_key->type;
+
+       acc = mult_64x32_and_fold(type, desc_len + 13);
+       acc = mult_64x32_and_fold(acc, 9207);
+       for (;;) {
+               n = desc_len;
+               if (n <= 0)
+                       break;
+               if (n > 4)
+                       n = 4;
+               piece = 0;
+               memcpy(&piece, description, n);
+               description += n;
+               desc_len -= n;
+               acc = mult_64x32_and_fold(acc, piece);
+               acc = mult_64x32_and_fold(acc, 9207);
+       }
+
+       /* Fold the hash down to 32 bits if need be. */
+       hash = acc;
+       if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32)
+               hash ^= acc >> 32;
+
+       /* Squidge all the keyrings into a separate part of the tree to
+        * ordinary keys by making sure the lowest level segment in the hash is
+        * zero for keyrings and non-zero otherwise.
+        */
+       if (index_key->type != &key_type_keyring && (hash & level_mask) == 0)
+               return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1;
+       if (index_key->type == &key_type_keyring && (hash & level_mask) != 0)
+               return (hash + (hash << level_shift)) & ~level_mask;
+       return hash;
 }
 
+/*
+ * Build the next index key chunk.
+ *
+ * On 32-bit systems the index key is laid out as:
+ *
+ *     0       4       5       9...
+ *     hash    desclen typeptr desc[]
+ *
+ * On 64-bit systems:
+ *
+ *     0       8       9       17...
+ *     hash    desclen typeptr desc[]
+ *
+ * We return it one word-sized chunk at a time.
+ */
+static unsigned long keyring_get_key_chunk(const void *data, int level)
+{
+       const struct keyring_index_key *index_key = data;
+       unsigned long chunk = 0;
+       long offset = 0;
+       int desc_len = index_key->desc_len, n = sizeof(chunk);
+
+       level /= ASSOC_ARRAY_KEY_CHUNK_SIZE;
+       switch (level) {
+       case 0:
+               return hash_key_type_and_desc(index_key);
+       case 1:
+               return ((unsigned long)index_key->type << 8) | desc_len;
+       case 2:
+               if (desc_len == 0)
+                       return (u8)((unsigned long)index_key->type >>
+                                   (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               n--;
+               offset = 1;
+       default:
+               offset += sizeof(chunk) - 1;
+               offset += (level - 3) * sizeof(chunk);
+               if (offset >= desc_len)
+                       return 0;
+               desc_len -= offset;
+               if (desc_len > n)
+                       desc_len = n;
+               offset += desc_len;
+               do {
+                       chunk <<= 8;
+                       chunk |= ((u8*)index_key->description)[--offset];
+               } while (--desc_len > 0);
+
+               if (level == 2) {
+                       chunk <<= 8;
+                       chunk |= (u8)((unsigned long)index_key->type >>
+                                     (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8));
+               }
+               return chunk;
+       }
+}
+
+static unsigned long keyring_get_object_key_chunk(const void *object, int level)
+{
+       const struct key *key = keyring_ptr_to_key(object);
+       return keyring_get_key_chunk(&key->index_key, level);
+}
+
+static bool keyring_compare_object(const void *object, const void *data)
+{
+       const struct keyring_index_key *index_key = data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       return key->index_key.type == index_key->type &&
+               key->index_key.desc_len == index_key->desc_len &&
+               memcmp(key->index_key.description, index_key->description,
+                      index_key->desc_len) == 0;
+}
+
+/*
+ * Compare the index keys of a pair of objects and determine the bit position
+ * at which they differ - if they differ.
+ */
+static int keyring_diff_objects(const void *_a, const void *_b)
+{
+       const struct key *key_a = keyring_ptr_to_key(_a);
+       const struct key *key_b = keyring_ptr_to_key(_b);
+       const struct keyring_index_key *a = &key_a->index_key;
+       const struct keyring_index_key *b = &key_b->index_key;
+       unsigned long seg_a, seg_b;
+       int level, i;
+
+       level = 0;
+       seg_a = hash_key_type_and_desc(a);
+       seg_b = hash_key_type_and_desc(b);
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The number of bits contributed by the hash is controlled by a
+        * constant in the assoc_array headers.  Everything else thereafter we
+        * can deal with as being machine word-size dependent.
+        */
+       level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8;
+       seg_a = a->desc_len;
+       seg_b = b->desc_len;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       /* The next bit may not work on big endian */
+       level++;
+       seg_a = (unsigned long)a->type;
+       seg_b = (unsigned long)b->type;
+       if ((seg_a ^ seg_b) != 0)
+               goto differ;
+
+       level += sizeof(unsigned long);
+       if (a->desc_len == 0)
+               goto same;
+
+       i = 0;
+       if (((unsigned long)a->description | (unsigned long)b->description) &
+           (sizeof(unsigned long) - 1)) {
+               do {
+                       seg_a = *(unsigned long *)(a->description + i);
+                       seg_b = *(unsigned long *)(b->description + i);
+                       if ((seg_a ^ seg_b) != 0)
+                               goto differ_plus_i;
+                       i += sizeof(unsigned long);
+               } while (i < (a->desc_len & (sizeof(unsigned long) - 1)));
+       }
+
+       for (; i < a->desc_len; i++) {
+               seg_a = *(unsigned char *)(a->description + i);
+               seg_b = *(unsigned char *)(b->description + i);
+               if ((seg_a ^ seg_b) != 0)
+                       goto differ_plus_i;
+       }
+
+same:
+       return -1;
+
+differ_plus_i:
+       level += i;
+differ:
+       i = level * 8 + __ffs(seg_a ^ seg_b);
+       return i;
+}
+
+/*
+ * Free an object after stripping the keyring flag off of the pointer.
+ */
+static void keyring_free_object(void *object)
+{
+       key_put(keyring_ptr_to_key(object));
+}
+
+/*
+ * Operations for keyring management by the index-tree routines.
+ */
+static const struct assoc_array_ops keyring_assoc_array_ops = {
+       .get_key_chunk          = keyring_get_key_chunk,
+       .get_object_key_chunk   = keyring_get_object_key_chunk,
+       .compare_object         = keyring_compare_object,
+       .diff_objects           = keyring_diff_objects,
+       .free_object            = keyring_free_object,
+};
+
 /*
  * Clean up a keyring when it is destroyed.  Unpublish its name if it had one
  * and dispose of its data.
@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description)
  */
 static void keyring_destroy(struct key *keyring)
 {
-       struct keyring_list *klist;
-       int loop;
-
        if (keyring->description) {
                write_lock(&keyring_name_lock);
 
@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring)
                write_unlock(&keyring_name_lock);
        }
 
-       klist = rcu_access_pointer(keyring->payload.subscriptions);
-       if (klist) {
-               for (loop = klist->nkeys - 1; loop >= 0; loop--)
-                       key_put(rcu_access_pointer(klist->keys[loop]));
-               kfree(klist);
-       }
+       assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops);
 }
 
 /*
@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring)
  */
 static void keyring_describe(const struct key *keyring, struct seq_file *m)
 {
-       struct keyring_list *klist;
-
        if (keyring->description)
                seq_puts(m, keyring->description);
        else
                seq_puts(m, "[anon]");
 
        if (key_is_instantiated(keyring)) {
-               rcu_read_lock();
-               klist = rcu_dereference(keyring->payload.subscriptions);
-               if (klist)
-                       seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys);
+               if (keyring->keys.nr_leaves_on_tree != 0)
+                       seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
                else
                        seq_puts(m, ": empty");
-               rcu_read_unlock();
        }
 }
 
+struct keyring_read_iterator_context {
+       size_t                  qty;
+       size_t                  count;
+       key_serial_t __user     *buffer;
+};
+
+static int keyring_read_iterator(const void *object, void *data)
+{
+       struct keyring_read_iterator_context *ctx = data;
+       const struct key *key = keyring_ptr_to_key(object);
+       int ret;
+
+       kenter("{%s,%d},,{%zu/%zu}",
+              key->type->name, key->serial, ctx->count, ctx->qty);
+
+       if (ctx->count >= ctx->qty)
+               return 1;
+
+       ret = put_user(key->serial, ctx->buffer);
+       if (ret < 0)
+               return ret;
+       ctx->buffer++;
+       ctx->count += sizeof(key->serial);
+       return 0;
+}
+
 /*
  * Read a list of key IDs from the keyring's contents in binary form
  *
- * The keyring's semaphore is read-locked by the caller.
+ * The keyring's semaphore is read-locked by the caller.  This prevents someone
+ * from modifying it under us - which could cause us to read key IDs multiple
+ * times.
  */
 static long keyring_read(const struct key *keyring,
                         char __user *buffer, size_t buflen)
 {
-       struct keyring_list *klist;
-       struct key *key;
-       size_t qty, tmp;
-       int loop, ret;
+       struct keyring_read_iterator_context ctx;
+       unsigned long nr_keys;
+       int ret;
 
-       ret = 0;
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* calculate how much data we could return */
-               qty = klist->nkeys * sizeof(key_serial_t);
-
-               if (buffer && buflen > 0) {
-                       if (buflen > qty)
-                               buflen = qty;
-
-                       /* copy the IDs of the subscribed keys into the
-                        * buffer */
-                       ret = -EFAULT;
-
-                       for (loop = 0; loop < klist->nkeys; loop++) {
-                               key = rcu_deref_link_locked(klist, loop,
-                                                           keyring);
-
-                               tmp = sizeof(key_serial_t);
-                               if (tmp > buflen)
-                                       tmp = buflen;
-
-                               if (copy_to_user(buffer,
-                                                &key->serial,
-                                                tmp) != 0)
-                                       goto error;
-
-                               buflen -= tmp;
-                               if (buflen == 0)
-                                       break;
-                               buffer += tmp;
-                       }
-               }
+       kenter("{%d},,%zu", key_serial(keyring), buflen);
 
-               ret = qty;
+       if (buflen & (sizeof(key_serial_t) - 1))
+               return -EINVAL;
+
+       nr_keys = keyring->keys.nr_leaves_on_tree;
+       if (nr_keys == 0)
+               return 0;
+
+       /* Calculate how much data we could return */
+       ctx.qty = nr_keys * sizeof(key_serial_t);
+
+       if (!buffer || !buflen)
+               return ctx.qty;
+
+       if (buflen > ctx.qty)
+               ctx.qty = buflen;
+
+       /* Copy the IDs of the subscribed keys into the buffer */
+       ctx.buffer = (key_serial_t __user *)buffer;
+       ctx.count = 0;
+       ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
+       if (ret < 0) {
+               kleave(" = %d [iterate]", ret);
+               return ret;
        }
 
-error:
-       return ret;
+       kleave(" = %zu [ok]", ctx.count);
+       return ctx.count;
 }
 
 /*
@@ -277,227 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 }
 EXPORT_SYMBOL(keyring_alloc);
 
-/**
- * keyring_search_aux - Search a keyring tree for a key matching some criteria
- * @keyring_ref: A pointer to the keyring with possession indicator.
- * @cred: The credentials to use for permissions checks.
- * @type: The type of key to search for.
- * @description: Parameter for @match.
- * @match: Function to rule on whether or not a key is the one required.
- * @no_state_check: Don't check if a matching key is bad
- *
- * Search the supplied keyring tree for a key that matches the criteria given.
- * The root keyring and any linked keyrings must grant Search permission to the
- * caller to be searchable and keys can only be found if they too grant Search
- * to the caller. The possession flag on the root keyring pointer controls use
- * of the possessor bits in permissions checking of the entire tree.  In
- * addition, the LSM gets to forbid keyring searches and key matches.
- *
- * The search is performed as a breadth-then-depth search up to the prescribed
- * limit (KEYRING_SEARCH_MAX_DEPTH).
- *
- * Keys are matched to the type provided and are then filtered by the match
- * function, which is given the description to use in any way it sees fit.  The
- * match function may use any attributes of a key that it wishes to to
- * determine the match.  Normally the match function from the key type would be
- * used.
- *
- * RCU is used to prevent the keyring key lists from disappearing without the
- * need to take lots of locks.
- *
- * Returns a pointer to the found key and increments the key usage count if
- * successful; -EAGAIN if no matching keys were found, or if expired or revoked
- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
- * specified keyring wasn't a keyring.
- *
- * In the case of a successful return, the possession attribute from
- * @keyring_ref is propagated to the returned key reference.
+/*
+ * Iteration function to consider each key found.
  */
-key_ref_t keyring_search_aux(key_ref_t keyring_ref,
-                            const struct cred *cred,
-                            struct key_type *type,
-                            const void *description,
-                            key_match_func_t match,
-                            bool no_state_check)
+static int keyring_search_iterator(const void *object, void *iterator_data)
 {
-       struct {
-               /* Need a separate keylist pointer for RCU purposes */
-               struct key *keyring;
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+       unsigned long kflags = key->flags;
 
-       struct keyring_list *keylist;
-       struct timespec now;
-       unsigned long possessed, kflags;
-       struct key *keyring, *key;
-       key_ref_t key_ref;
-       long err;
-       int sp, nkeys, kix;
-
-       keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
-       key_check(keyring);
+       kenter("{%d}", key->serial);
 
-       /* top keyring must have search permission to begin the search */
-       err = key_task_permission(keyring_ref, cred, KEY_SEARCH);
-       if (err < 0) {
-               key_ref = ERR_PTR(err);
-               goto error;
+       /* ignore keys not of this type */
+       if (key->type != ctx->index_key.type) {
+               kleave(" = 0 [!type]");
+               return 0;
        }
 
-       key_ref = ERR_PTR(-ENOTDIR);
-       if (keyring->type != &key_type_keyring)
-               goto error;
+       /* skip invalidated, revoked and expired keys */
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED))) {
+                       ctx->result = ERR_PTR(-EKEYREVOKED);
+                       kleave(" = %d [invrev]", ctx->skipped_ret);
+                       goto skipped;
+               }
 
-       rcu_read_lock();
+               if (key->expiry && ctx->now.tv_sec >= key->expiry) {
+                       ctx->result = ERR_PTR(-EKEYEXPIRED);
+                       kleave(" = %d [expire]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
 
-       now = current_kernel_time();
-       err = -EAGAIN;
-       sp = 0;
-
-       /* firstly we should check to see if this top-level keyring is what we
-        * are looking for */
-       key_ref = ERR_PTR(-EAGAIN);
-       kflags = keyring->flags;
-       if (keyring->type == type && match(keyring, description)) {
-               key = keyring;
-               if (no_state_check)
-                       goto found;
+       /* keys that don't match */
+       if (!ctx->match(key, ctx->match_data)) {
+               kleave(" = 0 [!match]");
+               return 0;
+       }
 
-               /* check it isn't negative and hasn't expired or been
-                * revoked */
-               if (kflags & (1 << KEY_FLAG_REVOKED))
-                       goto error_2;
-               if (key->expiry && now.tv_sec >= key->expiry)
-                       goto error_2;
-               key_ref = ERR_PTR(key->type_data.reject_error);
-               if (kflags & (1 << KEY_FLAG_NEGATIVE))
-                       goto error_2;
-               goto found;
+       /* key must have search permissions */
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+           key_task_permission(make_key_ref(key, ctx->possessed),
+                               ctx->cred, KEY_SEARCH) < 0) {
+               ctx->result = ERR_PTR(-EACCES);
+               kleave(" = %d [!perm]", ctx->skipped_ret);
+               goto skipped;
        }
 
-       /* otherwise, the top keyring must not be revoked, expired, or
-        * negatively instantiated if we are to search it */
-       key_ref = ERR_PTR(-EAGAIN);
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED) |
-                     (1 << KEY_FLAG_NEGATIVE)) ||
-           (keyring->expiry && now.tv_sec >= keyring->expiry))
-               goto error_2;
-
-       /* start processing a new keyring */
-descend:
-       kflags = keyring->flags;
-       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                     (1 << KEY_FLAG_REVOKED)))
-               goto not_this_keyring;
+       if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
+               /* we set a different error code if we pass a negative key */
+               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
+                       ctx->result = ERR_PTR(key->type_data.reject_error);
+                       kleave(" = %d [neg]", ctx->skipped_ret);
+                       goto skipped;
+               }
+       }
 
-       keylist = rcu_dereference(keyring->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
+       /* Found */
+       ctx->result = make_key_ref(key, ctx->possessed);
+       kleave(" = 1 [found]");
+       return 1;
 
-       /* iterate through the keys in this keyring first */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (kix = 0; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               kflags = key->flags;
+skipped:
+       return ctx->skipped_ret;
+}
 
-               /* ignore keys not of this type */
-               if (key->type != type)
-                       continue;
+/*
+ * Search inside a keyring for a key.  We can search by walking to it
+ * directly based on its index-key or we can iterate over the entire
+ * tree looking for it, based on the match function.
+ */
+static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
+{
+       if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
+           KEYRING_SEARCH_LOOKUP_DIRECT) {
+               const void *object;
+
+               object = assoc_array_find(&keyring->keys,
+                                         &keyring_assoc_array_ops,
+                                         &ctx->index_key);
+               return object ? ctx->iterator(object, ctx) : 0;
+       }
+       return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx);
+}
 
-               /* skip invalidated, revoked and expired keys */
-               if (!no_state_check) {
-                       if (kflags & ((1 << KEY_FLAG_INVALIDATED) |
-                                     (1 << KEY_FLAG_REVOKED)))
-                               continue;
+/*
+ * Search a tree of keyrings that point to other keyrings up to the maximum
+ * depth.
+ */
+static bool search_nested_keyrings(struct key *keyring,
+                                  struct keyring_search_context *ctx)
+{
+       struct {
+               struct key *keyring;
+               struct assoc_array_node *node;
+               int slot;
+       } stack[KEYRING_SEARCH_MAX_DEPTH];
 
-                       if (key->expiry && now.tv_sec >= key->expiry)
-                               continue;
-               }
+       struct assoc_array_shortcut *shortcut;
+       struct assoc_array_node *node;
+       struct assoc_array_ptr *ptr;
+       struct key *key;
+       int sp = 0, slot;
 
-               /* keys that don't match */
-               if (!match(key, description))
-                       continue;
+       kenter("{%d},{%s,%s}",
+              keyring->serial,
+              ctx->index_key.type->name,
+              ctx->index_key.description);
 
-               /* key must have search permissions */
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
-                       continue;
+       if (ctx->index_key.description)
+               ctx->index_key.desc_len = strlen(ctx->index_key.description);
 
-               if (no_state_check)
+       /* Check to see if this top-level keyring is what we are looking for
+        * and whether it is valid or not.
+        */
+       if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+           keyring_compare_object(keyring, &ctx->index_key)) {
+               ctx->skipped_ret = 2;
+               ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
+               switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) {
+               case 1:
                        goto found;
-
-               /* we set a different error code if we pass a negative key */
-               if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
-                       err = key->type_data.reject_error;
-                       continue;
+               case 2:
+                       return false;
+               default:
+                       break;
                }
+       }
 
+       ctx->skipped_ret = 0;
+       if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)
+               ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK;
+
+       /* Start processing a new keyring */
+descend_to_keyring:
+       kdebug("descend to %d", keyring->serial);
+       if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                             (1 << KEY_FLAG_REVOKED)))
+               goto not_this_keyring;
+
+       /* Search through the keys in this keyring before its searching its
+        * subtrees.
+        */
+       if (search_keyring(keyring, ctx))
                goto found;
-       }
 
-       /* search through the keyrings nested in this one */
-       kix = 0;
-ascend:
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-               if (key->type != &key_type_keyring)
-                       continue;
+       /* Then manually iterate through the keyrings nested in this one.
+        *
+        * Start from the root node of the index tree.  Because of the way the
+        * hash function has been set up, keyrings cluster on the leftmost
+        * branch of the root node (root slot 0) or in the root node itself.
+        * Non-keyrings avoid the leftmost branch of the root entirely (root
+        * slots 1-15).
+        */
+       ptr = ACCESS_ONCE(keyring->keys.root);
+       if (!ptr)
+               goto not_this_keyring;
 
-               /* recursively search nested keyrings
-                * - only search keyrings for which we have search permission
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               /* If the root is a shortcut, either the keyring only contains
+                * keyring pointers (everything clusters behind root slot 0) or
+                * doesn't contain any keyring pointers.
                 */
-               if (sp >= KEYRING_SEARCH_MAX_DEPTH)
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0)
+                       goto not_this_keyring;
+
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               node = assoc_array_ptr_to_node(ptr);
+               goto begin_node;
+       }
+
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+
+       ptr = node->slots[0];
+       if (!assoc_array_ptr_is_meta(ptr))
+               goto begin_node;
+
+descend_to_node:
+       /* Descend to a more distal node in this keyring's content tree and go
+        * through that.
+        */
+       kdebug("descend");
+       if (assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->next_node);
+               BUG_ON(!assoc_array_ptr_is_node(ptr));
+               node = assoc_array_ptr_to_node(ptr);
+       }
+
+begin_node:
+       kdebug("begin_node");
+       smp_read_barrier_depends();
+       slot = 0;
+ascend_to_node:
+       /* Go through the slots in a node */
+       for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
+               ptr = ACCESS_ONCE(node->slots[slot]);
+
+               if (assoc_array_ptr_is_meta(ptr) && node->back_pointer)
+                       goto descend_to_node;
+
+               if (!keyring_ptr_is_keyring(ptr))
                        continue;
 
-               if (key_task_permission(make_key_ref(key, possessed),
-                                       cred, KEY_SEARCH) < 0)
+               key = keyring_ptr_to_key(ptr);
+
+               if (sp >= KEYRING_SEARCH_MAX_DEPTH) {
+                       if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) {
+                               ctx->result = ERR_PTR(-ELOOP);
+                               return false;
+                       }
+                       goto not_this_keyring;
+               }
+
+               /* Search a nested keyring */
+               if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) &&
+                   key_task_permission(make_key_ref(key, ctx->possessed),
+                                       ctx->cred, KEY_SEARCH) < 0)
                        continue;
 
                /* stack the current position */
                stack[sp].keyring = keyring;
-               stack[sp].keylist = keylist;
-               stack[sp].kix = kix;
+               stack[sp].node = node;
+               stack[sp].slot = slot;
                sp++;
 
                /* begin again with the new keyring */
                keyring = key;
-               goto descend;
+               goto descend_to_keyring;
+       }
+
+       /* We've dealt with all the slots in the current node, so now we need
+        * to ascend to the parent and continue processing there.
+        */
+       ptr = ACCESS_ONCE(node->back_pointer);
+       slot = node->parent_slot;
+
+       if (ptr && assoc_array_ptr_is_shortcut(ptr)) {
+               shortcut = assoc_array_ptr_to_shortcut(ptr);
+               smp_read_barrier_depends();
+               ptr = ACCESS_ONCE(shortcut->back_pointer);
+               slot = shortcut->parent_slot;
+       }
+       if (!ptr)
+               goto not_this_keyring;
+       node = assoc_array_ptr_to_node(ptr);
+       smp_read_barrier_depends();
+       slot++;
+
+       /* If we've ascended to the root (zero backpointer), we must have just
+        * finished processing the leftmost branch rather than the root slots -
+        * so there can't be any more keyrings for us to find.
+        */
+       if (node->back_pointer) {
+               kdebug("ascend %d", slot);
+               goto ascend_to_node;
        }
 
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
+       /* The keyring we're looking at was disqualified or didn't contain a
+        * matching key.
+        */
 not_this_keyring:
-       if (sp > 0) {
-               /* resume the processing of a keyring higher up in the tree */
-               sp--;
-               keyring = stack[sp].keyring;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
+       kdebug("not_this_keyring %d", sp);
+       if (sp <= 0) {
+               kleave(" = false");
+               return false;
        }
 
-       key_ref = ERR_PTR(err);
-       goto error_2;
+       /* Resume the processing of a keyring higher up in the tree */
+       sp--;
+       keyring = stack[sp].keyring;
+       node = stack[sp].node;
+       slot = stack[sp].slot + 1;
+       kdebug("ascend to %d [%d]", keyring->serial, slot);
+       goto ascend_to_node;
 
-       /* we found a viable match */
+       /* We found a viable match */
 found:
-       atomic_inc(&key->usage);
-       key->last_used_at = now.tv_sec;
-       keyring->last_used_at = now.tv_sec;
-       while (sp > 0)
-               stack[--sp].keyring->last_used_at = now.tv_sec;
+       key = key_ref_to_ptr(ctx->result);
        key_check(key);
-       key_ref = make_key_ref(key, possessed);
-error_2:
+       if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) {
+               key->last_used_at = ctx->now.tv_sec;
+               keyring->last_used_at = ctx->now.tv_sec;
+               while (sp > 0)
+                       stack[--sp].keyring->last_used_at = ctx->now.tv_sec;
+       }
+       kleave(" = true");
+       return true;
+}
+
+/**
+ * keyring_search_aux - Search a keyring tree for a key matching some criteria
+ * @keyring_ref: A pointer to the keyring with possession indicator.
+ * @ctx: The keyring search context.
+ *
+ * Search the supplied keyring tree for a key that matches the criteria given.
+ * The root keyring and any linked keyrings must grant Search permission to the
+ * caller to be searchable and keys can only be found if they too grant Search
+ * to the caller. The possession flag on the root keyring pointer controls use
+ * of the possessor bits in permissions checking of the entire tree.  In
+ * addition, the LSM gets to forbid keyring searches and key matches.
+ *
+ * The search is performed as a breadth-then-depth search up to the prescribed
+ * limit (KEYRING_SEARCH_MAX_DEPTH).
+ *
+ * Keys are matched to the type provided and are then filtered by the match
+ * function, which is given the description to use in any way it sees fit.  The
+ * match function may use any attributes of a key that it wishes to to
+ * determine the match.  Normally the match function from the key type would be
+ * used.
+ *
+ * RCU can be used to prevent the keyring key lists from disappearing without
+ * the need to take lots of locks.
+ *
+ * Returns a pointer to the found key and increments the key usage count if
+ * successful; -EAGAIN if no matching keys were found, or if expired or revoked
+ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the
+ * specified keyring wasn't a keyring.
+ *
+ * In the case of a successful return, the possession attribute from
+ * @keyring_ref is propagated to the returned key reference.
+ */
+key_ref_t keyring_search_aux(key_ref_t keyring_ref,
+                            struct keyring_search_context *ctx)
+{
+       struct key *keyring;
+       long err;
+
+       ctx->iterator = keyring_search_iterator;
+       ctx->possessed = is_key_possessed(keyring_ref);
+       ctx->result = ERR_PTR(-EAGAIN);
+
+       keyring = key_ref_to_ptr(keyring_ref);
+       key_check(keyring);
+
+       if (keyring->type != &key_type_keyring)
+               return ERR_PTR(-ENOTDIR);
+
+       if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) {
+               err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH);
+               if (err < 0)
+                       return ERR_PTR(err);
+       }
+
+       rcu_read_lock();
+       ctx->now = current_kernel_time();
+       if (search_nested_keyrings(keyring, ctx))
+               __key_get(key_ref_to_ptr(ctx->result));
        rcu_read_unlock();
-error:
-       return key_ref;
+       return ctx->result;
 }
 
 /**
@@ -507,77 +863,73 @@ error:
  * @description: The name of the keyring we want to find.
  *
  * As keyring_search_aux() above, but using the current task's credentials and
- * type's default matching function.
+ * type's default matching function and preferred search method.
  */
 key_ref_t keyring_search(key_ref_t keyring,
                         struct key_type *type,
                         const char *description)
 {
-       if (!type->match)
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = (type->def_lookup_type |
+                                          KEYRING_SEARCH_DO_STATE_CHECK),
+       };
+
+       if (!ctx.match)
                return ERR_PTR(-ENOKEY);
 
-       return keyring_search_aux(keyring, current->cred,
-                                 type, description, type->match, false);
+       return keyring_search_aux(keyring, &ctx);
 }
 EXPORT_SYMBOL(keyring_search);
 
 /*
- * Search the given keyring only (no recursion).
+ * Search the given keyring for a key that might be updated.
  *
  * The caller must guarantee that the keyring is a keyring and that the
- * permission is granted to search the keyring as no check is made here.
- *
- * RCU is used to make it unnecessary to lock the keyring key list here.
+ * permission is granted to modify the keyring as no check is made here.  The
+ * caller must also hold a lock on the keyring semaphore.
  *
  * Returns a pointer to the found key with usage count incremented if
- * successful and returns -ENOKEY if not found.  Revoked keys and keys not
- * providing the requested permission are skipped over.
+ * successful and returns NULL if not found.  Revoked and invalidated keys are
+ * skipped over.
  *
  * If successful, the possession indicator is propagated from the keyring ref
  * to the returned key reference.
  */
-key_ref_t __keyring_search_one(key_ref_t keyring_ref,
-                              const struct key_type *ktype,
-                              const char *description,
-                              key_perm_t perm)
+key_ref_t find_key_to_update(key_ref_t keyring_ref,
+                            const struct keyring_index_key *index_key)
 {
-       struct keyring_list *klist;
-       unsigned long possessed;
        struct key *keyring, *key;
-       int nkeys, loop;
+       const void *object;
 
        keyring = key_ref_to_ptr(keyring_ref);
-       possessed = is_key_possessed(keyring_ref);
 
-       rcu_read_lock();
+       kenter("{%d},{%s,%s}",
+              keyring->serial, index_key->type->name, index_key->description);
 
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (klist) {
-               nkeys = klist->nkeys;
-               smp_rmb();
-               for (loop = 0; loop < nkeys ; loop++) {
-                       key = rcu_dereference(klist->keys[loop]);
-                       if (key->type == ktype &&
-                           (!key->type->match ||
-                            key->type->match(key, description)) &&
-                           key_permission(make_key_ref(key, possessed),
-                                          perm) == 0 &&
-                           !(key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-                                           (1 << KEY_FLAG_REVOKED)))
-                           )
-                               goto found;
-               }
-       }
+       object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops,
+                                 index_key);
 
-       rcu_read_unlock();
-       return ERR_PTR(-ENOKEY);
+       if (object)
+               goto found;
+
+       kleave(" = NULL");
+       return NULL;
 
 found:
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-       rcu_read_unlock();
-       return make_key_ref(key, possessed);
+       key = keyring_ptr_to_key(object);
+       if (key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                         (1 << KEY_FLAG_REVOKED))) {
+               kleave(" = NULL [x]");
+               return NULL;
+       }
+       __key_get(key);
+       kleave(" = {%d}", key->serial);
+       return make_key_ref(key, is_key_possessed(keyring_ref));
 }
 
 /*
@@ -640,6 +992,19 @@ out:
        return keyring;
 }
 
+static int keyring_detect_cycle_iterator(const void *object,
+                                        void *iterator_data)
+{
+       struct keyring_search_context *ctx = iterator_data;
+       const struct key *key = keyring_ptr_to_key(object);
+
+       kenter("{%d}", key->serial);
+
+       BUG_ON(key != ctx->match_data);
+       ctx->result = ERR_PTR(-EDEADLK);
+       return 1;
+}
+
 /*
  * See if a cycle will will be created by inserting acyclic tree B in acyclic
  * tree A at the topmost level (ie: as a direct child of A).
@@ -649,116 +1014,39 @@ out:
  */
 static int keyring_detect_cycle(struct key *A, struct key *B)
 {
-       struct {
-               struct keyring_list *keylist;
-               int kix;
-       } stack[KEYRING_SEARCH_MAX_DEPTH];
-
-       struct keyring_list *keylist;
-       struct key *subtree, *key;
-       int sp, nkeys, kix, ret;
+       struct keyring_search_context ctx = {
+               .index_key      = A->index_key,
+               .match_data     = A,
+               .iterator       = keyring_detect_cycle_iterator,
+               .flags          = (KEYRING_SEARCH_LOOKUP_DIRECT |
+                                  KEYRING_SEARCH_NO_STATE_CHECK |
+                                  KEYRING_SEARCH_NO_UPDATE_TIME |
+                                  KEYRING_SEARCH_NO_CHECK_PERM |
+                                  KEYRING_SEARCH_DETECT_TOO_DEEP),
+       };
 
        rcu_read_lock();
-
-       ret = -EDEADLK;
-       if (A == B)
-               goto cycle_detected;
-
-       subtree = B;
-       sp = 0;
-
-       /* start processing a new keyring */
-descend:
-       if (test_bit(KEY_FLAG_REVOKED, &subtree->flags))
-               goto not_this_keyring;
-
-       keylist = rcu_dereference(subtree->payload.subscriptions);
-       if (!keylist)
-               goto not_this_keyring;
-       kix = 0;
-
-ascend:
-       /* iterate through the remaining keys in this keyring */
-       nkeys = keylist->nkeys;
-       smp_rmb();
-       for (; kix < nkeys; kix++) {
-               key = rcu_dereference(keylist->keys[kix]);
-
-               if (key == A)
-                       goto cycle_detected;
-
-               /* recursively check nested keyrings */
-               if (key->type == &key_type_keyring) {
-                       if (sp >= KEYRING_SEARCH_MAX_DEPTH)
-                               goto too_deep;
-
-                       /* stack the current position */
-                       stack[sp].keylist = keylist;
-                       stack[sp].kix = kix;
-                       sp++;
-
-                       /* begin again with the new keyring */
-                       subtree = key;
-                       goto descend;
-               }
-       }
-
-       /* the keyring we're looking at was disqualified or didn't contain a
-        * matching key */
-not_this_keyring:
-       if (sp > 0) {
-               /* resume the checking of a keyring higher up in the tree */
-               sp--;
-               keylist = stack[sp].keylist;
-               kix = stack[sp].kix + 1;
-               goto ascend;
-       }
-
-       ret = 0; /* no cycles detected */
-
-error:
+       search_nested_keyrings(B, &ctx);
        rcu_read_unlock();
-       return ret;
-
-too_deep:
-       ret = -ELOOP;
-       goto error;
-
-cycle_detected:
-       ret = -EDEADLK;
-       goto error;
-}
-
-/*
- * Dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist =
-               container_of(rcu, struct keyring_list, rcu);
-
-       if (klist->delkey != USHRT_MAX)
-               key_put(rcu_access_pointer(klist->keys[klist->delkey]));
-       kfree(klist);
+       return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result);
 }
 
 /*
  * Preallocate memory so that a key can be linked into to a keyring.
  */
-int __key_link_begin(struct key *keyring, const struct key_type *type,
-                    const char *description, unsigned long *_prealloc)
+int __key_link_begin(struct key *keyring,
+                    const struct keyring_index_key *index_key,
+                    struct assoc_array_edit **_edit)
        __acquires(&keyring->sem)
        __acquires(&keyring_serialise_link_sem)
 {
-       struct keyring_list *klist, *nklist;
-       unsigned long prealloc;
-       unsigned max;
-       time_t lowest_lru;
-       size_t size;
-       int loop, lru, ret;
+       struct assoc_array_edit *edit;
+       int ret;
+
+       kenter("%d,%s,%s,",
+              keyring->serial, index_key->type->name, index_key->description);
 
-       kenter("%d,%s,%s,", key_serial(keyring), type->name, description);
+       BUG_ON(index_key->desc_len == 0);
 
        if (keyring->type != &key_type_keyring)
                return -ENOTDIR;
@@ -771,91 +1059,28 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 
        /* serialise link/link calls to prevent parallel calls causing a cycle
         * when linking two keyring in opposite orders */
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                down_write(&keyring_serialise_link_sem);
 
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       /* see if there's a matching key we can displace */
-       lru = -1;
-       if (klist && klist->nkeys > 0) {
-               lowest_lru = TIME_T_MAX;
-               for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-                       struct key *key = rcu_deref_link_locked(klist, loop,
-                                                               keyring);
-                       if (key->type == type &&
-                           strcmp(key->description, description) == 0) {
-                               /* Found a match - we'll replace the link with
-                                * one to the new key.  We record the slot
-                                * position.
-                                */
-                               klist->delkey = loop;
-                               prealloc = 0;
-                               goto done;
-                       }
-                       if (key->last_used_at < lowest_lru) {
-                               lowest_lru = key->last_used_at;
-                               lru = loop;
-                       }
-               }
-       }
-
-       /* If the keyring is full then do an LRU discard */
-       if (klist &&
-           klist->nkeys == klist->maxkeys &&
-           klist->maxkeys >= MAX_KEYRING_LINKS) {
-               kdebug("LRU discard %d\n", lru);
-               klist->delkey = lru;
-               prealloc = 0;
-               goto done;
-       }
-
        /* check that we aren't going to overrun the user's quota */
        ret = key_payload_reserve(keyring,
                                  keyring->datalen + KEYQUOTA_LINK_BYTES);
        if (ret < 0)
                goto error_sem;
 
-       if (klist && klist->nkeys < klist->maxkeys) {
-               /* there's sufficient slack space to append directly */
-               klist->delkey = klist->nkeys;
-               prealloc = KEY_LINK_FIXQUOTA;
-       } else {
-               /* grow the key list */
-               max = 4;
-               if (klist) {
-                       max += klist->maxkeys;
-                       if (max > MAX_KEYRING_LINKS)
-                               max = MAX_KEYRING_LINKS;
-                       BUG_ON(max <= klist->maxkeys);
-               }
-
-               size = sizeof(*klist) + sizeof(struct key *) * max;
-
-               ret = -ENOMEM;
-               nklist = kmalloc(size, GFP_KERNEL);
-               if (!nklist)
-                       goto error_quota;
-
-               nklist->maxkeys = max;
-               if (klist) {
-                       memcpy(nklist->keys, klist->keys,
-                              sizeof(struct key *) * klist->nkeys);
-                       nklist->delkey = klist->nkeys;
-                       nklist->nkeys = klist->nkeys + 1;
-                       klist->delkey = USHRT_MAX;
-               } else {
-                       nklist->nkeys = 1;
-                       nklist->delkey = 0;
-               }
-
-               /* add the key into the new space */
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL);
-               prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA;
+       /* Create an edit script that will insert/replace the key in the
+        * keyring tree.
+        */
+       edit = assoc_array_insert(&keyring->keys,
+                                 &keyring_assoc_array_ops,
+                                 index_key,
+                                 NULL);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+               goto error_quota;
        }
 
-done:
-       *_prealloc = prealloc;
+       *_edit = edit;
        kleave(" = 0");
        return 0;
 
@@ -864,7 +1089,7 @@ error_quota:
        key_payload_reserve(keyring,
                            keyring->datalen - KEYQUOTA_LINK_BYTES);
 error_sem:
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                up_write(&keyring_serialise_link_sem);
 error_krsem:
        up_write(&keyring->sem);
@@ -895,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key)
  * holds at most one link to any given key of a particular type+description
  * combination.
  */
-void __key_link(struct key *keyring, struct key *key,
-               unsigned long *_prealloc)
+void __key_link(struct key *key, struct assoc_array_edit **_edit)
 {
-       struct keyring_list *klist, *nklist;
-       struct key *discard;
-
-       nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA);
-       *_prealloc = 0;
-
-       kenter("%d,%d,%p", keyring->serial, key->serial, nklist);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-
-       atomic_inc(&key->usage);
-       keyring->last_used_at = key->last_used_at =
-               current_kernel_time().tv_sec;
-
-       /* there's a matching key we can displace or an empty slot in a newly
-        * allocated list we can fill */
-       if (nklist) {
-               kdebug("reissue %hu/%hu/%hu",
-                      nklist->delkey, nklist->nkeys, nklist->maxkeys);
-
-               RCU_INIT_POINTER(nklist->keys[nklist->delkey], key);
-
-               rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-               /* dispose of the old keyring list and, if there was one, the
-                * displaced key */
-               if (klist) {
-                       kdebug("dispose %hu/%hu/%hu",
-                              klist->delkey, klist->nkeys, klist->maxkeys);
-                       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
-               }
-       } else if (klist->delkey < klist->nkeys) {
-               kdebug("replace %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               discard = rcu_dereference_protected(
-                       klist->keys[klist->delkey],
-                       rwsem_is_locked(&keyring->sem));
-               rcu_assign_pointer(klist->keys[klist->delkey], key);
-               /* The garbage collector will take care of RCU
-                * synchronisation */
-               key_put(discard);
-       } else {
-               /* there's sufficient slack space to append directly */
-               kdebug("append %hu/%hu/%hu",
-                      klist->delkey, klist->nkeys, klist->maxkeys);
-
-               RCU_INIT_POINTER(klist->keys[klist->delkey], key);
-               smp_wmb();
-               klist->nkeys++;
-       }
+       __key_get(key);
+       assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key));
+       assoc_array_apply_edit(*_edit);
+       *_edit = NULL;
 }
 
 /*
@@ -956,24 +1133,22 @@ void __key_link(struct key *keyring, struct key *key,
  *
  * Must be called with __key_link_begin() having being called.
  */
-void __key_link_end(struct key *keyring, struct key_type *type,
-                   unsigned long prealloc)
+void __key_link_end(struct key *keyring,
+                   const struct keyring_index_key *index_key,
+                   struct assoc_array_edit *edit)
        __releases(&keyring->sem)
        __releases(&keyring_serialise_link_sem)
 {
-       BUG_ON(type == NULL);
-       BUG_ON(type->name == NULL);
-       kenter("%d,%s,%lx", keyring->serial, type->name, prealloc);
+       BUG_ON(index_key->type == NULL);
+       kenter("%d,%s,", keyring->serial, index_key->type->name);
 
-       if (type == &key_type_keyring)
+       if (index_key->type == &key_type_keyring)
                up_write(&keyring_serialise_link_sem);
 
-       if (prealloc) {
-               if (prealloc & KEY_LINK_FIXQUOTA)
-                       key_payload_reserve(keyring,
-                                           keyring->datalen -
-                                           KEYQUOTA_LINK_BYTES);
-               kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA));
+       if (edit) {
+               key_payload_reserve(keyring,
+                                   keyring->datalen - KEYQUOTA_LINK_BYTES);
+               assoc_array_cancel_edit(edit);
        }
        up_write(&keyring->sem);
 }
@@ -1000,20 +1175,28 @@ void __key_link_end(struct key *keyring, struct key_type *type,
  */
 int key_link(struct key *keyring, struct key *key)
 {
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        int ret;
 
+       kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage));
+
        key_check(keyring);
        key_check(key);
 
-       ret = __key_link_begin(keyring, key->type, key->description, &prealloc);
+       if (test_bit(KEY_FLAG_TRUSTED_ONLY, &keyring->flags) &&
+           !test_bit(KEY_FLAG_TRUSTED, &key->flags))
+               return -EPERM;
+
+       ret = __key_link_begin(keyring, &key->index_key, &edit);
        if (ret == 0) {
+               kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage));
                ret = __key_link_check_live_key(keyring, key);
                if (ret == 0)
-                       __key_link(keyring, key, &prealloc);
-               __key_link_end(keyring, key->type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(keyring, &key->index_key, edit);
        }
 
+       kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage));
        return ret;
 }
 EXPORT_SYMBOL(key_link);
@@ -1037,90 +1220,36 @@ EXPORT_SYMBOL(key_link);
  */
 int key_unlink(struct key *keyring, struct key *key)
 {
-       struct keyring_list *klist, *nklist;
-       int loop, ret;
+       struct assoc_array_edit *edit;
+       int ret;
 
        key_check(keyring);
        key_check(key);
 
-       ret = -ENOTDIR;
        if (keyring->type != &key_type_keyring)
-               goto error;
+               return -ENOTDIR;
 
        down_write(&keyring->sem);
 
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (klist) {
-               /* search the keyring for the key */
-               for (loop = 0; loop < klist->nkeys; loop++)
-                       if (rcu_access_pointer(klist->keys[loop]) == key)
-                               goto key_is_present;
+       edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops,
+                                 &key->index_key);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+               goto error;
        }
-
-       up_write(&keyring->sem);
        ret = -ENOENT;
-       goto error;
-
-key_is_present:
-       /* we need to copy the key list for RCU purposes */
-       nklist = kmalloc(sizeof(*klist) +
-                        sizeof(struct key *) * klist->maxkeys,
-                        GFP_KERNEL);
-       if (!nklist)
-               goto nomem;
-       nklist->maxkeys = klist->maxkeys;
-       nklist->nkeys = klist->nkeys - 1;
-
-       if (loop > 0)
-               memcpy(&nklist->keys[0],
-                      &klist->keys[0],
-                      loop * sizeof(struct key *));
-
-       if (loop < nklist->nkeys)
-               memcpy(&nklist->keys[loop],
-                      &klist->keys[loop + 1],
-                      (nklist->nkeys - loop) * sizeof(struct key *));
-
-       /* adjust the user's quota */
-       key_payload_reserve(keyring,
-                           keyring->datalen - KEYQUOTA_LINK_BYTES);
-
-       rcu_assign_pointer(keyring->payload.subscriptions, nklist);
-
-       up_write(&keyring->sem);
-
-       /* schedule for later cleanup */
-       klist->delkey = loop;
-       call_rcu(&klist->rcu, keyring_unlink_rcu_disposal);
+       if (edit == NULL)
+               goto error;
 
+       assoc_array_apply_edit(edit);
        ret = 0;
 
 error:
-       return ret;
-nomem:
-       ret = -ENOMEM;
        up_write(&keyring->sem);
-       goto error;
+       return ret;
 }
 EXPORT_SYMBOL(key_unlink);
 
-/*
- * Dispose of a keyring list after the RCU grace period, releasing the keys it
- * links to.
- */
-static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist;
-       int loop;
-
-       klist = container_of(rcu, struct keyring_list, rcu);
-
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               key_put(rcu_access_pointer(klist->keys[loop]));
-
-       kfree(klist);
-}
-
 /**
  * keyring_clear - Clear a keyring
  * @keyring: The keyring to clear.
@@ -1131,33 +1260,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu)
  */
 int keyring_clear(struct key *keyring)
 {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
        int ret;
 
-       ret = -ENOTDIR;
-       if (keyring->type == &key_type_keyring) {
-               /* detach the pointer block with the locks held */
-               down_write(&keyring->sem);
-
-               klist = rcu_dereference_locked_keyring(keyring);
-               if (klist) {
-                       /* adjust the quota */
-                       key_payload_reserve(keyring,
-                                           sizeof(struct keyring_list));
-
-                       rcu_assign_pointer(keyring->payload.subscriptions,
-                                          NULL);
-               }
-
-               up_write(&keyring->sem);
+       if (keyring->type != &key_type_keyring)
+               return -ENOTDIR;
 
-               /* free the keys after the locks have been dropped */
-               if (klist)
-                       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+       down_write(&keyring->sem);
 
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (IS_ERR(edit)) {
+               ret = PTR_ERR(edit);
+       } else {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
                ret = 0;
        }
 
+       up_write(&keyring->sem);
        return ret;
 }
 EXPORT_SYMBOL(keyring_clear);
@@ -1169,17 +1290,25 @@ EXPORT_SYMBOL(keyring_clear);
  */
 static void keyring_revoke(struct key *keyring)
 {
-       struct keyring_list *klist;
+       struct assoc_array_edit *edit;
 
-       klist = rcu_dereference_locked_keyring(keyring);
+       edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops);
+       if (!IS_ERR(edit)) {
+               if (edit)
+                       assoc_array_apply_edit(edit);
+               key_payload_reserve(keyring, 0);
+       }
+}
 
-       /* adjust the quota */
-       key_payload_reserve(keyring, 0);
+static bool gc_iterator(void *object, void *iterator_data)
+{
+       struct key *key = keyring_ptr_to_key(object);
+       time_t *limit = iterator_data;
 
-       if (klist) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       }
+       if (key_is_dead(key, *limit))
+               return false;
+       key_get(key);
+       return true;
 }
 
 /*
@@ -1192,88 +1321,12 @@ static void keyring_revoke(struct key *keyring)
  */
 void keyring_gc(struct key *keyring, time_t limit)
 {
-       struct keyring_list *klist, *new;
-       struct key *key;
-       int loop, keep, max;
-
        kenter("{%x,%s}", key_serial(keyring), keyring->description);
 
        down_write(&keyring->sem);
-
-       klist = rcu_dereference_locked_keyring(keyring);
-       if (!klist)
-               goto no_klist;
-
-       /* work out how many subscriptions we're keeping */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--)
-               if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring),
-                                limit))
-                       keep++;
-
-       if (keep == klist->nkeys)
-               goto just_return;
-
-       /* allocate a new keyring payload */
-       max = roundup(keep, 4);
-       new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
-                     GFP_KERNEL);
-       if (!new)
-               goto nomem;
-       new->maxkeys = max;
-       new->nkeys = 0;
-       new->delkey = 0;
-
-       /* install the live keys
-        * - must take care as expired keys may be updated back to life
-        */
-       keep = 0;
-       for (loop = klist->nkeys - 1; loop >= 0; loop--) {
-               key = rcu_deref_link_locked(klist, loop, keyring);
-               if (!key_is_dead(key, limit)) {
-                       if (keep >= max)
-                               goto discard_new;
-                       RCU_INIT_POINTER(new->keys[keep++], key_get(key));
-               }
-       }
-       new->nkeys = keep;
-
-       /* adjust the quota */
-       key_payload_reserve(keyring,
-                           sizeof(struct keyring_list) +
-                           KEYQUOTA_LINK_BYTES * keep);
-
-       if (keep == 0) {
-               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
-               kfree(new);
-       } else {
-               rcu_assign_pointer(keyring->payload.subscriptions, new);
-       }
-
-       up_write(&keyring->sem);
-
-       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
-       kleave(" [yes]");
-       return;
-
-discard_new:
-       new->nkeys = keep;
-       keyring_clear_rcu_disposal(&new->rcu);
+       assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops,
+                      gc_iterator, &limit);
        up_write(&keyring->sem);
-       kleave(" [discard]");
-       return;
 
-just_return:
-       up_write(&keyring->sem);
-       kleave(" [no dead]");
-       return;
-
-no_klist:
-       up_write(&keyring->sem);
-       kleave(" [no_klist]");
-       return;
-
-nomem:
-       up_write(&keyring->sem);
-       kleave(" [oom]");
+       kleave("");
 }
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
new file mode 100644 (file)
index 0000000..82f4957
--- /dev/null
@@ -0,0 +1,169 @@
+/* General persistent per-UID keyrings register
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/user_namespace.h>
+#include "internal.h"
+
+unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
+
+/*
+ * Create the persistent keyring register for the current user namespace.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static int key_create_persistent_register(struct user_namespace *ns)
+{
+       struct key *reg = keyring_alloc(".persistent_register",
+                                       KUIDT_INIT(0), KGIDT_INIT(0),
+                                       current_cred(),
+                                       ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                        KEY_USR_VIEW | KEY_USR_READ),
+                                       KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(reg))
+               return PTR_ERR(reg);
+
+       ns->persistent_keyring_register = reg;
+       return 0;
+}
+
+/*
+ * Create the persistent keyring for the specified user.
+ *
+ * Called with the namespace's sem locked for writing.
+ */
+static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid,
+                                      struct keyring_index_key *index_key)
+{
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+
+       if (!ns->persistent_keyring_register) {
+               long err = key_create_persistent_register(ns);
+               if (err < 0)
+                       return ERR_PTR(err);
+       } else {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               persistent_ref = find_key_to_update(reg_ref, index_key);
+               if (persistent_ref)
+                       return persistent_ref;
+       }
+
+       persistent = keyring_alloc(index_key->description,
+                                  uid, INVALID_GID, current_cred(),
+                                  ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                                   KEY_USR_VIEW | KEY_USR_READ),
+                                  KEY_ALLOC_NOT_IN_QUOTA,
+                                  ns->persistent_keyring_register);
+       if (IS_ERR(persistent))
+               return ERR_CAST(persistent);
+
+       return make_key_ref(persistent, true);
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+static long key_get_persistent(struct user_namespace *ns, kuid_t uid,
+                              key_ref_t dest_ref)
+{
+       struct keyring_index_key index_key;
+       struct key *persistent;
+       key_ref_t reg_ref, persistent_ref;
+       char buf[32];
+       long ret;
+
+       /* Look in the register if it exists */
+       index_key.type = &key_type_keyring;
+       index_key.description = buf;
+       index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid));
+
+       if (ns->persistent_keyring_register) {
+               reg_ref = make_key_ref(ns->persistent_keyring_register, true);
+               down_read(&ns->persistent_keyring_register_sem);
+               persistent_ref = find_key_to_update(reg_ref, &index_key);
+               up_read(&ns->persistent_keyring_register_sem);
+
+               if (persistent_ref)
+                       goto found;
+       }
+
+       /* It wasn't in the register, so we'll need to create it.  We might
+        * also need to create the register.
+        */
+       down_write(&ns->persistent_keyring_register_sem);
+       persistent_ref = key_create_persistent(ns, uid, &index_key);
+       up_write(&ns->persistent_keyring_register_sem);
+       if (!IS_ERR(persistent_ref))
+               goto found;
+
+       return PTR_ERR(persistent_ref);
+
+found:
+       ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK);
+       if (ret == 0) {
+               persistent = key_ref_to_ptr(persistent_ref);
+               ret = key_link(key_ref_to_ptr(dest_ref), persistent);
+               if (ret == 0) {
+                       key_set_timeout(persistent, persistent_keyring_expiry);
+                       ret = persistent->serial;               
+               }
+       }
+
+       key_ref_put(persistent_ref);
+       return ret;
+}
+
+/*
+ * Get the persistent keyring for a specific UID and link it to the nominated
+ * keyring.
+ */
+long keyctl_get_persistent(uid_t _uid, key_serial_t destid)
+{
+       struct user_namespace *ns = current_user_ns();
+       key_ref_t dest_ref;
+       kuid_t uid;
+       long ret;
+
+       /* -1 indicates the current user */
+       if (_uid == (uid_t)-1) {
+               uid = current_uid();
+       } else {
+               uid = make_kuid(ns, _uid);
+               if (!uid_valid(uid))
+                       return -EINVAL;
+
+               /* You can only see your own persistent cache if you're not
+                * sufficiently privileged.
+                */
+               if (uid_eq(uid, current_uid()) &&
+                   uid_eq(uid, current_suid()) &&
+                   uid_eq(uid, current_euid()) &&
+                   uid_eq(uid, current_fsuid()) &&
+                   !ns_capable(ns, CAP_SETUID))
+                       return -EPERM;
+       }
+
+       /* There must be a destination keyring */
+       dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE);
+       if (IS_ERR(dest_ref))
+               return PTR_ERR(dest_ref);
+       if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) {
+               ret = -ENOTDIR;
+               goto out_put_dest;
+       }
+
+       ret = key_get_persistent(ns, uid, dest_ref);
+
+out_put_dest:
+       key_ref_put(dest_ref);
+       return ret;
+}
index 217b6855e815cb851153fa08646d2bf145cee579..88e9a466940f642af60f61b407888155ba057be5 100644 (file)
@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v)
 
 static int proc_keys_show(struct seq_file *m, void *v)
 {
-       const struct cred *cred = current_cred();
        struct rb_node *_p = v;
        struct key *key = rb_entry(_p, struct key, serial_node);
        struct timespec now;
@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v)
        char xbuf[12];
        int rc;
 
+       struct keyring_search_context ctx = {
+               .index_key.type         = key->type,
+               .index_key.description  = key->description,
+               .cred                   = current_cred(),
+               .match                  = lookup_user_key_possessed,
+               .match_data             = key,
+               .flags                  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
+
        key_ref = make_key_ref(key, 0);
 
        /* determine if the key is possessed by this process (a test we can
         * skip if the key does not indicate the possessor can view it
         */
        if (key->perm & KEY_POS_VIEW) {
-               skey_ref = search_my_process_keyrings(key->type, key,
-                                                     lookup_user_key_possessed,
-                                                     true, cred);
+               skey_ref = search_my_process_keyrings(&ctx);
                if (!IS_ERR(skey_ref)) {
                        key_ref_put(skey_ref);
                        key_ref = make_key_ref(key, 1);
@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
         * - the caller holds a spinlock, and thus the RCU read lock, making our
         *   access to __current_cred() safe
         */
-       rc = key_task_permission(key_ref, cred, KEY_VIEW);
+       rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW);
        if (rc < 0)
                return 0;
 
index 42defae1e161632e93b13b8194af1a30a09f2492..0cf8a130a267ca58fbc5599787c93b9913cfc576 100644 (file)
@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
                if (IS_ERR(keyring))
                        return PTR_ERR(keyring);
        } else {
-               atomic_inc(&keyring->usage);
+               __key_get(keyring);
        }
 
        /* install the keyring */
@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk)
  * In the case of a successful return, the possession attribute is set on the
  * returned key reference.
  */
-key_ref_t search_my_process_keyrings(struct key_type *type,
-                                    const void *description,
-                                    key_match_func_t match,
-                                    bool no_state_check,
-                                    const struct cred *cred)
+key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx)
 {
        key_ref_t key_ref, ret, err;
 
@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        err = ERR_PTR(-EAGAIN);
 
        /* search the thread keyring first */
-       if (cred->thread_keyring) {
+       if (ctx->cred->thread_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->thread_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->thread_keyring, 1), ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        }
 
        /* search the process keyring second */
-       if (cred->process_keyring) {
+       if (ctx->cred->process_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->process_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->process_keyring, 1), ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
        }
 
        /* search the session keyring */
-       if (cred->session_keyring) {
+       if (ctx->cred->session_keyring) {
                rcu_read_lock();
                key_ref = keyring_search_aux(
-                       make_key_ref(rcu_dereference(cred->session_keyring), 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1),
+                       ctx);
                rcu_read_unlock();
 
                if (!IS_ERR(key_ref))
@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
                }
        }
        /* or search the user-session keyring */
-       else if (cred->user->session_keyring) {
+       else if (ctx->cred->user->session_keyring) {
                key_ref = keyring_search_aux(
-                       make_key_ref(cred->user->session_keyring, 1),
-                       cred, type, description, match, no_state_check);
+                       make_key_ref(ctx->cred->user->session_keyring, 1),
+                       ctx);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -437,18 +431,14 @@ found:
  *
  * Return same as search_my_process_keyrings().
  */
-key_ref_t search_process_keyrings(struct key_type *type,
-                                 const void *description,
-                                 key_match_func_t match,
-                                 const struct cred *cred)
+key_ref_t search_process_keyrings(struct keyring_search_context *ctx)
 {
        struct request_key_auth *rka;
        key_ref_t key_ref, ret = ERR_PTR(-EACCES), err;
 
        might_sleep();
 
-       key_ref = search_my_process_keyrings(type, description, match,
-                                            false, cred);
+       key_ref = search_my_process_keyrings(ctx);
        if (!IS_ERR(key_ref))
                goto found;
        err = key_ref;
@@ -457,18 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
         * search the keyrings of the process mentioned there
         * - we don't permit access to request_key auth keys via this method
         */
-       if (cred->request_key_auth &&
-           cred == current_cred() &&
-           type != &key_type_request_key_auth
+       if (ctx->cred->request_key_auth &&
+           ctx->cred == current_cred() &&
+           ctx->index_key.type != &key_type_request_key_auth
            ) {
+               const struct cred *cred = ctx->cred;
+
                /* defend against the auth key being revoked */
                down_read(&cred->request_key_auth->sem);
 
-               if (key_validate(cred->request_key_auth) == 0) {
-                       rka = cred->request_key_auth->payload.data;
+               if (key_validate(ctx->cred->request_key_auth) == 0) {
+                       rka = ctx->cred->request_key_auth->payload.data;
 
-                       key_ref = search_process_keyrings(type, description,
-                                                         match, rka->cred);
+                       ctx->cred = rka->cred;
+                       key_ref = search_process_keyrings(ctx);
+                       ctx->cred = cred;
 
                        up_read(&cred->request_key_auth->sem);
 
@@ -522,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target)
 key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
                          key_perm_t perm)
 {
+       struct keyring_search_context ctx = {
+               .match  = lookup_user_key_possessed,
+               .flags  = (KEYRING_SEARCH_NO_STATE_CHECK |
+                          KEYRING_SEARCH_LOOKUP_DIRECT),
+       };
        struct request_key_auth *rka;
-       const struct cred *cred;
        struct key *key;
        key_ref_t key_ref, skey_ref;
        int ret;
 
 try_again:
-       cred = get_current_cred();
+       ctx.cred = get_current_cred();
        key_ref = ERR_PTR(-ENOKEY);
 
        switch (id) {
        case KEY_SPEC_THREAD_KEYRING:
-               if (!cred->thread_keyring) {
+               if (!ctx.cred->thread_keyring) {
                        if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
@@ -546,13 +543,13 @@ try_again:
                        goto reget_creds;
                }
 
-               key = cred->thread_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->thread_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_PROCESS_KEYRING:
-               if (!cred->process_keyring) {
+               if (!ctx.cred->process_keyring) {
                        if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
@@ -564,13 +561,13 @@ try_again:
                        goto reget_creds;
                }
 
-               key = cred->process_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->process_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_SESSION_KEYRING:
-               if (!cred->session_keyring) {
+               if (!ctx.cred->session_keyring) {
                        /* always install a session keyring upon access if one
                         * doesn't exist yet */
                        ret = install_user_keyrings();
@@ -580,13 +577,13 @@ try_again:
                                ret = join_session_keyring(NULL);
                        else
                                ret = install_session_keyring(
-                                       cred->user->session_keyring);
+                                       ctx.cred->user->session_keyring);
 
                        if (ret < 0)
                                goto error;
                        goto reget_creds;
-               } else if (cred->session_keyring ==
-                          cred->user->session_keyring &&
+               } else if (ctx.cred->session_keyring ==
+                          ctx.cred->user->session_keyring &&
                           lflags & KEY_LOOKUP_CREATE) {
                        ret = join_session_keyring(NULL);
                        if (ret < 0)
@@ -595,33 +592,33 @@ try_again:
                }
 
                rcu_read_lock();
-               key = rcu_dereference(cred->session_keyring);
-               atomic_inc(&key->usage);
+               key = rcu_dereference(ctx.cred->session_keyring);
+               __key_get(key);
                rcu_read_unlock();
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_USER_KEYRING:
-               if (!cred->user->uid_keyring) {
+               if (!ctx.cred->user->uid_keyring) {
                        ret = install_user_keyrings();
                        if (ret < 0)
                                goto error;
                }
 
-               key = cred->user->uid_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->uid_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_USER_SESSION_KEYRING:
-               if (!cred->user->session_keyring) {
+               if (!ctx.cred->user->session_keyring) {
                        ret = install_user_keyrings();
                        if (ret < 0)
                                goto error;
                }
 
-               key = cred->user->session_keyring;
-               atomic_inc(&key->usage);
+               key = ctx.cred->user->session_keyring;
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
@@ -631,29 +628,29 @@ try_again:
                goto error;
 
        case KEY_SPEC_REQKEY_AUTH_KEY:
-               key = cred->request_key_auth;
+               key = ctx.cred->request_key_auth;
                if (!key)
                        goto error;
 
-               atomic_inc(&key->usage);
+               __key_get(key);
                key_ref = make_key_ref(key, 1);
                break;
 
        case KEY_SPEC_REQUESTOR_KEYRING:
-               if (!cred->request_key_auth)
+               if (!ctx.cred->request_key_auth)
                        goto error;
 
-               down_read(&cred->request_key_auth->sem);
+               down_read(&ctx.cred->request_key_auth->sem);
                if (test_bit(KEY_FLAG_REVOKED,
-                            &cred->request_key_auth->flags)) {
+                            &ctx.cred->request_key_auth->flags)) {
                        key_ref = ERR_PTR(-EKEYREVOKED);
                        key = NULL;
                } else {
-                       rka = cred->request_key_auth->payload.data;
+                       rka = ctx.cred->request_key_auth->payload.data;
                        key = rka->dest_keyring;
-                       atomic_inc(&key->usage);
+                       __key_get(key);
                }
-               up_read(&cred->request_key_auth->sem);
+               up_read(&ctx.cred->request_key_auth->sem);
                if (!key)
                        goto error;
                key_ref = make_key_ref(key, 1);
@@ -673,9 +670,13 @@ try_again:
                key_ref = make_key_ref(key, 0);
 
                /* check to see if we possess the key */
-               skey_ref = search_process_keyrings(key->type, key,
-                                                  lookup_user_key_possessed,
-                                                  cred);
+               ctx.index_key.type              = key->type;
+               ctx.index_key.description       = key->description;
+               ctx.index_key.desc_len          = strlen(key->description);
+               ctx.match_data                  = key;
+               kdebug("check possessed");
+               skey_ref = search_process_keyrings(&ctx);
+               kdebug("possessed=%p", skey_ref);
 
                if (!IS_ERR(skey_ref)) {
                        key_put(key);
@@ -715,14 +716,14 @@ try_again:
                goto invalid_key;
 
        /* check the permissions */
-       ret = key_task_permission(key_ref, cred, perm);
+       ret = key_task_permission(key_ref, ctx.cred, perm);
        if (ret < 0)
                goto invalid_key;
 
        key->last_used_at = current_kernel_time().tv_sec;
 
 error:
-       put_cred(cred);
+       put_cred(ctx.cred);
        return key_ref;
 
 invalid_key:
@@ -733,7 +734,7 @@ invalid_key:
        /* if we attempted to install a keyring, then it may have caused new
         * creds to be installed */
 reget_creds:
-       put_cred(cred);
+       put_cred(ctx.cred);
        goto try_again;
 }
 
@@ -856,3 +857,13 @@ void key_change_session_keyring(struct callback_head *twork)
 
        commit_creds(new);
 }
+
+/*
+ * Make sure that root's user and user-session keyrings exist.
+ */
+static int __init init_root_keyring(void)
+{
+       return install_user_keyrings();
+}
+
+late_initcall(init_root_keyring);
index c411f9bb156b205751ae06983e85f547119a245f..df94827103d0c12001b970697ab7b6de16882283 100644 (file)
@@ -345,33 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
  * May return a key that's already under construction instead if there was a
  * race between two thread calling request_key().
  */
-static int construct_alloc_key(struct key_type *type,
-                              const char *description,
+static int construct_alloc_key(struct keyring_search_context *ctx,
                               struct key *dest_keyring,
                               unsigned long flags,
                               struct key_user *user,
                               struct key **_key)
 {
-       const struct cred *cred = current_cred();
-       unsigned long prealloc;
+       struct assoc_array_edit *edit;
        struct key *key;
        key_perm_t perm;
        key_ref_t key_ref;
        int ret;
 
-       kenter("%s,%s,,,", type->name, description);
+       kenter("%s,%s,,,",
+              ctx->index_key.type->name, ctx->index_key.description);
 
        *_key = NULL;
        mutex_lock(&user->cons_lock);
 
        perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
        perm |= KEY_USR_VIEW;
-       if (type->read)
+       if (ctx->index_key.type->read)
                perm |= KEY_POS_READ;
-       if (type == &key_type_keyring || type->update)
+       if (ctx->index_key.type == &key_type_keyring ||
+           ctx->index_key.type->update)
                perm |= KEY_POS_WRITE;
 
-       key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
+       key = key_alloc(ctx->index_key.type, ctx->index_key.description,
+                       ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred,
                        perm, flags);
        if (IS_ERR(key))
                goto alloc_failed;
@@ -379,8 +380,7 @@ static int construct_alloc_key(struct key_type *type,
        set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
        if (dest_keyring) {
-               ret = __key_link_begin(dest_keyring, type, description,
-                                      &prealloc);
+               ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
                if (ret < 0)
                        goto link_prealloc_failed;
        }
@@ -390,16 +390,16 @@ static int construct_alloc_key(struct key_type *type,
         * waited for locks */
        mutex_lock(&key_construction_mutex);
 
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(ctx);
        if (!IS_ERR(key_ref))
                goto key_already_present;
 
        if (dest_keyring)
-               __key_link(dest_keyring, key, &prealloc);
+               __key_link(key, &edit);
 
        mutex_unlock(&key_construction_mutex);
        if (dest_keyring)
-               __key_link_end(dest_keyring, type, prealloc);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
        mutex_unlock(&user->cons_lock);
        *_key = key;
        kleave(" = 0 [%d]", key_serial(key));
@@ -414,8 +414,8 @@ key_already_present:
        if (dest_keyring) {
                ret = __key_link_check_live_key(dest_keyring, key);
                if (ret == 0)
-                       __key_link(dest_keyring, key, &prealloc);
-               __key_link_end(dest_keyring, type, prealloc);
+                       __key_link(key, &edit);
+               __key_link_end(dest_keyring, &ctx->index_key, edit);
                if (ret < 0)
                        goto link_check_failed;
        }
@@ -444,8 +444,7 @@ alloc_failed:
 /*
  * Commence key construction.
  */
-static struct key *construct_key_and_link(struct key_type *type,
-                                         const char *description,
+static struct key *construct_key_and_link(struct keyring_search_context *ctx,
                                          const char *callout_info,
                                          size_t callout_len,
                                          void *aux,
@@ -464,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type,
 
        construct_get_dest_keyring(&dest_keyring);
 
-       ret = construct_alloc_key(type, description, dest_keyring, flags, user,
-                                 &key);
+       ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key);
        key_user_put(user);
 
        if (ret == 0) {
@@ -529,17 +527,24 @@ struct key *request_key_and_link(struct key_type *type,
                                 struct key *dest_keyring,
                                 unsigned long flags)
 {
-       const struct cred *cred = current_cred();
+       struct keyring_search_context ctx = {
+               .index_key.type         = type,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = type->match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
        struct key *key;
        key_ref_t key_ref;
        int ret;
 
        kenter("%s,%s,%p,%zu,%p,%p,%lx",
-              type->name, description, callout_info, callout_len, aux,
-              dest_keyring, flags);
+              ctx.index_key.type->name, ctx.index_key.description,
+              callout_info, callout_len, aux, dest_keyring, flags);
 
        /* search all the process keyrings for a key */
-       key_ref = search_process_keyrings(type, description, type->match, cred);
+       key_ref = search_process_keyrings(&ctx);
 
        if (!IS_ERR(key_ref)) {
                key = key_ref_to_ptr(key_ref);
@@ -562,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type,
                if (!callout_info)
                        goto error;
 
-               key = construct_key_and_link(type, description, callout_info,
-                                            callout_len, aux, dest_keyring,
-                                            flags);
+               key = construct_key_and_link(&ctx, callout_info, callout_len,
+                                            aux, dest_keyring, flags);
        }
 
 error:
index 85730d5a5a59a05c852b3d22c586778b117589fa..7495a93b4b9024dad78d526d17feb7d07f93016f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include "internal.h"
+#include <keys/user-type.h>
 
 static int request_key_auth_instantiate(struct key *,
                                        struct key_preparsed_payload *);
@@ -221,33 +222,27 @@ error_alloc:
        return ERR_PTR(ret);
 }
 
-/*
- * See if an authorisation key is associated with a particular key.
- */
-static int key_get_instantiation_authkey_match(const struct key *key,
-                                              const void *_id)
-{
-       struct request_key_auth *rka = key->payload.data;
-       key_serial_t id = (key_serial_t)(unsigned long) _id;
-
-       return rka->target_key->serial == id;
-}
-
 /*
  * Search the current process's keyrings for the authorisation key for
  * instantiation of a key.
  */
 struct key *key_get_instantiation_authkey(key_serial_t target_id)
 {
-       const struct cred *cred = current_cred();
+       char description[16];
+       struct keyring_search_context ctx = {
+               .index_key.type         = &key_type_request_key_auth,
+               .index_key.description  = description,
+               .cred                   = current_cred(),
+               .match                  = user_match,
+               .match_data             = description,
+               .flags                  = KEYRING_SEARCH_LOOKUP_DIRECT,
+       };
        struct key *authkey;
        key_ref_t authkey_ref;
 
-       authkey_ref = search_process_keyrings(
-               &key_type_request_key_auth,
-               (void *) (unsigned long) target_id,
-               key_get_instantiation_authkey_match,
-               cred);
+       sprintf(description, "%x", target_id);
+
+       authkey_ref = search_process_keyrings(&ctx);
 
        if (IS_ERR(authkey_ref)) {
                authkey = ERR_CAST(authkey_ref);
index ee32d181764ab876fa2c6b0470c4a65f937cd031..8c0af08760c809b2923d04c5cc3b114c75e27b27 100644 (file)
@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = {
                .extra1 = (void *) &zero,
                .extra2 = (void *) &max,
        },
+#ifdef CONFIG_PERSISTENT_KEYRINGS
+       {
+               .procname = "persistent_keyring_expiry",
+               .data = &persistent_keyring_expiry,
+               .maxlen = sizeof(unsigned),
+               .mode = 0644,
+               .proc_handler = proc_dointvec_minmax,
+               .extra1 = (void *) &zero,
+               .extra2 = (void *) &max,
+       },
+#endif
        { }
 };
index 55dc88939185812f70145427b96c991ed6d636e2..faa2caeb593f8524a059e79d58e09bf430a1f992 100644 (file)
@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc);
  * arbitrary blob of data as the payload
  */
 struct key_type key_type_user = {
-       .name           = "user",
-       .instantiate    = user_instantiate,
-       .update         = user_update,
-       .match          = user_match,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
+       .name                   = "user",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
+       .instantiate            = user_instantiate,
+       .update                 = user_update,
+       .match                  = user_match,
+       .revoke                 = user_revoke,
+       .destroy                = user_destroy,
+       .describe               = user_describe,
+       .read                   = user_read,
 };
 
 EXPORT_SYMBOL_GPL(key_type_user);
@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user);
  */
 struct key_type key_type_logon = {
        .name                   = "logon",
+       .def_lookup_type        = KEYRING_SEARCH_LOOKUP_DIRECT,
        .instantiate            = user_instantiate,
        .update                 = user_update,
        .match                  = user_match,
index 4dc31f4f2700626cb951aed5e874f0ce18d8b064..15b6928592ef68aac565e3fc94daf4737b6adc54 100644 (file)
@@ -1340,22 +1340,17 @@ int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
        return security_ops->xfrm_policy_delete_security(ctx);
 }
 
-int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+int security_xfrm_state_alloc(struct xfrm_state *x,
+                             struct xfrm_user_sec_ctx *sec_ctx)
 {
-       return security_ops->xfrm_state_alloc_security(x, sec_ctx, 0);
+       return security_ops->xfrm_state_alloc(x, sec_ctx);
 }
 EXPORT_SYMBOL(security_xfrm_state_alloc);
 
 int security_xfrm_state_alloc_acquire(struct xfrm_state *x,
                                      struct xfrm_sec_ctx *polsec, u32 secid)
 {
-       if (!polsec)
-               return 0;
-       /*
-        * We want the context to be taken from secid which is usually
-        * from the sock.
-        */
-       return security_ops->xfrm_state_alloc_security(x, NULL, secid);
+       return security_ops->xfrm_state_alloc_acquire(x, polsec, secid);
 }
 
 int security_xfrm_state_delete(struct xfrm_state *x)
index c540795fb3f2647619cb4705281872e93592e21e..794c3ca49eac92998caa17be71a4bdc472c2e9c8 100644 (file)
@@ -95,7 +95,9 @@
 #include "audit.h"
 #include "avc_ss.h"
 
-#define NUM_SEL_MNT_OPTS 5
+#define SB_TYPE_FMT "%s%s%s"
+#define SB_SUBTYPE(sb) (sb->s_subtype && sb->s_subtype[0])
+#define SB_TYPE_ARGS(sb) sb->s_type->name, SB_SUBTYPE(sb) ? "." : "", SB_SUBTYPE(sb) ? sb->s_subtype : ""
 
 extern struct security_operations *security_ops;
 
@@ -139,12 +141,28 @@ static struct kmem_cache *sel_inode_cache;
  * This function checks the SECMARK reference counter to see if any SECMARK
  * targets are currently configured, if the reference counter is greater than
  * zero SECMARK is considered to be enabled.  Returns true (1) if SECMARK is
- * enabled, false (0) if SECMARK is disabled.
+ * enabled, false (0) if SECMARK is disabled.  If the always_check_network
+ * policy capability is enabled, SECMARK is always considered enabled.
  *
  */
 static int selinux_secmark_enabled(void)
 {
-       return (atomic_read(&selinux_secmark_refcount) > 0);
+       return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount));
+}
+
+/**
+ * selinux_peerlbl_enabled - Check to see if peer labeling is currently enabled
+ *
+ * Description:
+ * This function checks if NetLabel or labeled IPSEC is enabled.  Returns true
+ * (1) if any are enabled or false (0) if neither are enabled.  If the
+ * always_check_network policy capability is enabled, peer labeling
+ * is always considered enabled.
+ *
+ */
+static int selinux_peerlbl_enabled(void)
+{
+       return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled());
 }
 
 /*
@@ -309,8 +327,11 @@ enum {
        Opt_defcontext = 3,
        Opt_rootcontext = 4,
        Opt_labelsupport = 5,
+       Opt_nextmntopt = 6,
 };
 
+#define NUM_SEL_MNT_OPTS       (Opt_nextmntopt - 1)
+
 static const match_table_t tokens = {
        {Opt_context, CONTEXT_STR "%s"},
        {Opt_fscontext, FSCONTEXT_STR "%s"},
@@ -355,6 +376,29 @@ static int may_context_mount_inode_relabel(u32 sid,
        return rc;
 }
 
+static int selinux_is_sblabel_mnt(struct super_block *sb)
+{
+       struct superblock_security_struct *sbsec = sb->s_security;
+
+       if (sbsec->behavior == SECURITY_FS_USE_XATTR ||
+           sbsec->behavior == SECURITY_FS_USE_TRANS ||
+           sbsec->behavior == SECURITY_FS_USE_TASK)
+               return 1;
+
+       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+               return 1;
+
+       /*
+        * Special handling for rootfs. Is genfs but supports
+        * setting SELinux context on in-core inodes.
+        */
+       if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0)
+               return 1;
+
+       return 0;
+}
+
 static int sb_finish_set_opts(struct super_block *sb)
 {
        struct superblock_security_struct *sbsec = sb->s_security;
@@ -369,8 +413,8 @@ static int sb_finish_set_opts(struct super_block *sb)
                   the first boot of the SELinux kernel before we have
                   assigned xattr values to the filesystem. */
                if (!root_inode->i_op->getxattr) {
-                       printk(KERN_WARNING "SELinux: (dev %s, type %s) has no "
-                              "xattr support\n", sb->s_id, sb->s_type->name);
+                       printk(KERN_WARNING "SELinux: (dev %s, type "SB_TYPE_FMT") has no "
+                              "xattr support\n", sb->s_id, SB_TYPE_ARGS(sb));
                        rc = -EOPNOTSUPP;
                        goto out;
                }
@@ -378,35 +422,27 @@ static int sb_finish_set_opts(struct super_block *sb)
                if (rc < 0 && rc != -ENODATA) {
                        if (rc == -EOPNOTSUPP)
                                printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) has no security xattr handler\n",
-                                      sb->s_id, sb->s_type->name);
+                                      SB_TYPE_FMT") has no security xattr handler\n",
+                                      sb->s_id, SB_TYPE_ARGS(sb));
                        else
                                printk(KERN_WARNING "SELinux: (dev %s, type "
-                                      "%s) getxattr errno %d\n", sb->s_id,
-                                      sb->s_type->name, -rc);
+                                      SB_TYPE_FMT") getxattr errno %d\n", sb->s_id,
+                                      SB_TYPE_ARGS(sb), -rc);
                        goto out;
                }
        }
 
-       sbsec->flags |= (SE_SBINITIALIZED | SE_SBLABELSUPP);
-
        if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n",
-                      sb->s_id, sb->s_type->name);
+               printk(KERN_ERR "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), unknown behavior\n",
+                      sb->s_id, SB_TYPE_ARGS(sb));
        else
-               printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n",
-                      sb->s_id, sb->s_type->name,
+               printk(KERN_DEBUG "SELinux: initialized (dev %s, type "SB_TYPE_FMT"), %s\n",
+                      sb->s_id, SB_TYPE_ARGS(sb),
                       labeling_behaviors[sbsec->behavior-1]);
 
-       if (sbsec->behavior == SECURITY_FS_USE_GENFS ||
-           sbsec->behavior == SECURITY_FS_USE_MNTPOINT ||
-           sbsec->behavior == SECURITY_FS_USE_NONE ||
-           sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
-               sbsec->flags &= ~SE_SBLABELSUPP;
-
-       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
-       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
-               sbsec->flags |= SE_SBLABELSUPP;
+       sbsec->flags |= SE_SBINITIALIZED;
+       if (selinux_is_sblabel_mnt(sb))
+               sbsec->flags |= SBLABEL_MNT;
 
        /* Initialize the root inode. */
        rc = inode_doinit_with_dentry(root_inode, root);
@@ -460,15 +496,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
        if (!ss_initialized)
                return -EINVAL;
 
+       /* make sure we always check enough bits to cover the mask */
+       BUILD_BUG_ON(SE_MNTMASK >= (1 << NUM_SEL_MNT_OPTS));
+
        tmp = sbsec->flags & SE_MNTMASK;
        /* count the number of mount options for this sb */
-       for (i = 0; i < 8; i++) {
+       for (i = 0; i < NUM_SEL_MNT_OPTS; i++) {
                if (tmp & 0x01)
                        opts->num_mnt_opts++;
                tmp >>= 1;
        }
        /* Check if the Label support flag is set */
-       if (sbsec->flags & SE_SBLABELSUPP)
+       if (sbsec->flags & SBLABEL_MNT)
                opts->num_mnt_opts++;
 
        opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
@@ -515,9 +554,9 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
                opts->mnt_opts[i] = context;
                opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
        }
-       if (sbsec->flags & SE_SBLABELSUPP) {
+       if (sbsec->flags & SBLABEL_MNT) {
                opts->mnt_opts[i] = NULL;
-               opts->mnt_opts_flags[i++] = SE_SBLABELSUPP;
+               opts->mnt_opts_flags[i++] = SBLABEL_MNT;
        }
 
        BUG_ON(i != opts->num_mnt_opts);
@@ -561,7 +600,6 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        const struct cred *cred = current_cred();
        int rc = 0, i;
        struct superblock_security_struct *sbsec = sb->s_security;
-       const char *name = sb->s_type->name;
        struct inode *inode = sbsec->sb->s_root->d_inode;
        struct inode_security_struct *root_isec = inode->i_security;
        u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
@@ -614,14 +652,14 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        for (i = 0; i < num_opts; i++) {
                u32 sid;
 
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                        continue;
                rc = security_context_to_sid(mount_options[i],
                                             strlen(mount_options[i]), &sid);
                if (rc) {
                        printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                        goto out;
                }
                switch (flags[i]) {
@@ -685,9 +723,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
                 * Determine the labeling behavior to use for this
                 * filesystem type.
                 */
-               rc = security_fs_use((sbsec->flags & SE_SBPROC) ?
-                                       "proc" : sb->s_type->name,
-                                       &sbsec->behavior, &sbsec->sid);
+               rc = security_fs_use(sb);
                if (rc) {
                        printk(KERN_WARNING
                                "%s: security_fs_use(%s) returned %d\n",
@@ -770,7 +806,8 @@ out:
 out_double_mount:
        rc = -EINVAL;
        printk(KERN_WARNING "SELinux: mount invalid.  Same superblock, different "
-              "security settings for (dev %s, type %s)\n", sb->s_id, name);
+              "security settings for (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
        goto out;
 }
 
@@ -1037,7 +1074,7 @@ static void selinux_write_opts(struct seq_file *m,
                case DEFCONTEXT_MNT:
                        prefix = DEFCONTEXT_STR;
                        break;
-               case SE_SBLABELSUPP:
+               case SBLABEL_MNT:
                        seq_putc(m, ',');
                        seq_puts(m, LABELSUPP_STR);
                        continue;
@@ -1649,7 +1686,7 @@ static int may_create(struct inode *dir,
        if (rc)
                return rc;
 
-       if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                rc = security_transition_sid(sid, dsec->sid, tclass,
                                             &dentry->d_name, &newsid);
                if (rc)
@@ -2437,14 +2474,14 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
                u32 sid;
                size_t len;
 
-               if (flags[i] == SE_SBLABELSUPP)
+               if (flags[i] == SBLABEL_MNT)
                        continue;
                len = strlen(mount_options[i]);
                rc = security_context_to_sid(mount_options[i], len, &sid);
                if (rc) {
                        printk(KERN_WARNING "SELinux: security_context_to_sid"
-                              "(%s) failed for (dev %s, type %s) errno=%d\n",
-                              mount_options[i], sb->s_id, sb->s_type->name, rc);
+                              "(%s) failed for (dev %s, type "SB_TYPE_FMT") errno=%d\n",
+                              mount_options[i], sb->s_id, SB_TYPE_ARGS(sb), rc);
                        goto out_free_opts;
                }
                rc = -EINVAL;
@@ -2482,8 +2519,8 @@ out_free_secdata:
        return rc;
 out_bad_option:
        printk(KERN_WARNING "SELinux: unable to change security options "
-              "during remount (dev %s, type=%s)\n", sb->s_id,
-              sb->s_type->name);
+              "during remount (dev %s, type "SB_TYPE_FMT")\n", sb->s_id,
+              SB_TYPE_ARGS(sb));
        goto out_free_opts;
 }
 
@@ -2606,7 +2643,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
        if ((sbsec->flags & SE_SBINITIALIZED) &&
            (sbsec->behavior == SECURITY_FS_USE_MNTPOINT))
                newsid = sbsec->mntpoint_sid;
-       else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
+       else if (!newsid || !(sbsec->flags & SBLABEL_MNT)) {
                rc = security_transition_sid(sid, dsec->sid,
                                             inode_mode_to_security_class(inode->i_mode),
                                             qstr, &newsid);
@@ -2628,7 +2665,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
                isec->initialized = 1;
        }
 
-       if (!ss_initialized || !(sbsec->flags & SE_SBLABELSUPP))
+       if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT))
                return -EOPNOTSUPP;
 
        if (name)
@@ -2830,7 +2867,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
                return selinux_inode_setotherxattr(dentry, name);
 
        sbsec = inode->i_sb->s_security;
-       if (!(sbsec->flags & SE_SBLABELSUPP))
+       if (!(sbsec->flags & SBLABEL_MNT))
                return -EOPNOTSUPP;
 
        if (!inode_owner_or_capable(inode))
@@ -3791,8 +3828,12 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
        u32 nlbl_sid;
        u32 nlbl_type;
 
-       selinux_skb_xfrm_sid(skb, &xfrm_sid);
-       selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       err = selinux_skb_xfrm_sid(skb, &xfrm_sid);
+       if (unlikely(err))
+               return -EACCES;
+       err = selinux_netlbl_skbuff_getsid(skb, family, &nlbl_type, &nlbl_sid);
+       if (unlikely(err))
+               return -EACCES;
 
        err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid);
        if (unlikely(err)) {
@@ -4246,7 +4287,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
                return selinux_sock_rcv_skb_compat(sk, skb, family);
 
        secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return 0;
 
@@ -4628,7 +4669,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 
        secmark_active = selinux_secmark_enabled();
        netlbl_active = netlbl_enabled();
-       peerlbl_active = netlbl_active || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return NF_ACCEPT;
 
@@ -4780,7 +4821,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
                return NF_ACCEPT;
 #endif
        secmark_active = selinux_secmark_enabled();
-       peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled();
+       peerlbl_active = selinux_peerlbl_enabled();
        if (!secmark_active && !peerlbl_active)
                return NF_ACCEPT;
 
@@ -5784,7 +5825,8 @@ static struct security_operations selinux_ops = {
        .xfrm_policy_clone_security =   selinux_xfrm_policy_clone,
        .xfrm_policy_free_security =    selinux_xfrm_policy_free,
        .xfrm_policy_delete_security =  selinux_xfrm_policy_delete,
-       .xfrm_state_alloc_security =    selinux_xfrm_state_alloc,
+       .xfrm_state_alloc =             selinux_xfrm_state_alloc,
+       .xfrm_state_alloc_acquire =     selinux_xfrm_state_alloc_acquire,
        .xfrm_state_free_security =     selinux_xfrm_state_free,
        .xfrm_state_delete_security =   selinux_xfrm_state_delete,
        .xfrm_policy_lookup =           selinux_xfrm_policy_lookup,
index aa47bcabb5f65e728aadbaa39cdecfa55d20aa16..b1dfe104945078ead53647c247c46aa6134fac2e 100644 (file)
@@ -58,8 +58,8 @@ struct superblock_security_struct {
        u32 sid;                        /* SID of file system superblock */
        u32 def_sid;                    /* default SID for labeling */
        u32 mntpoint_sid;               /* SECURITY_FS_USE_MNTPOINT context for files */
-       unsigned int behavior;          /* labeling behavior */
-       unsigned char flags;            /* which mount options were specified */
+       unsigned short behavior;        /* labeling behavior */
+       unsigned short flags;           /* which mount options were specified */
        struct mutex lock;
        struct list_head isec_head;
        spinlock_t isec_lock;
index 8fd8e18ea34019c863d91ba88268b8c4018f3410..fe341ae370049b39ac2012d665a64dd4dc9af198 100644 (file)
 /* Mask for just the mount related flags */
 #define SE_MNTMASK     0x0f
 /* Super block security struct flags for mount options */
+/* BE CAREFUL, these need to be the low order bits for selinux_get_mnt_opts */
 #define CONTEXT_MNT    0x01
 #define FSCONTEXT_MNT  0x02
 #define ROOTCONTEXT_MNT        0x04
 #define DEFCONTEXT_MNT 0x08
+#define SBLABEL_MNT    0x10
 /* Non-mount related flags */
-#define SE_SBINITIALIZED       0x10
-#define SE_SBPROC              0x20
-#define SE_SBLABELSUPP 0x40
+#define SE_SBINITIALIZED       0x0100
+#define SE_SBPROC              0x0200
 
 #define CONTEXT_STR    "context="
 #define FSCONTEXT_STR  "fscontext="
@@ -68,12 +69,15 @@ extern int selinux_enabled;
 enum {
        POLICYDB_CAPABILITY_NETPEER,
        POLICYDB_CAPABILITY_OPENPERM,
+       POLICYDB_CAPABILITY_REDHAT1,
+       POLICYDB_CAPABILITY_ALWAYSNETWORK,
        __POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
 
 extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
+extern int selinux_policycap_alwaysnetwork;
 
 /*
  * type_datum properties
@@ -172,8 +176,7 @@ int security_get_allow_unknown(void);
 #define SECURITY_FS_USE_NATIVE         7 /* use native label support */
 #define SECURITY_FS_USE_MAX            7 /* Highest SECURITY_FS_USE_XXX */
 
-int security_fs_use(const char *fstype, unsigned int *behavior,
-       u32 *sid);
+int security_fs_use(struct super_block *sb);
 
 int security_genfs_sid(const char *fstype, char *name, u16 sclass,
        u32 *sid);
index 6713f04e30ba8810415f88f7ed6e78cb5685f6f4..0dec76c64cf53853d0eea6aac983db307c8636b8 100644 (file)
 #include <net/flow.h>
 
 int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
-                             struct xfrm_user_sec_ctx *sec_ctx);
+                             struct xfrm_user_sec_ctx *uctx);
 int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                              struct xfrm_sec_ctx **new_ctxp);
 void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx);
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx);
 int selinux_xfrm_state_alloc(struct xfrm_state *x,
-       struct xfrm_user_sec_ctx *sec_ctx, u32 secid);
+                            struct xfrm_user_sec_ctx *uctx);
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid);
 void selinux_xfrm_state_free(struct xfrm_state *x);
 int selinux_xfrm_state_delete(struct xfrm_state *x);
 int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
-                       struct xfrm_policy *xp, const struct flowi *fl);
-
-/*
- * Extract the security blob from the sock (it's actually on the socket)
- */
-static inline struct inode_security_struct *get_sock_isec(struct sock *sk)
-{
-       if (!sk->sk_socket)
-               return NULL;
-
-       return SOCK_INODE(sk->sk_socket)->i_security;
-}
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 extern atomic_t selinux_xfrm_refcount;
@@ -42,10 +34,10 @@ static inline int selinux_xfrm_enabled(void)
        return (atomic_read(&selinux_xfrm_refcount) > 0);
 }
 
-int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-                       struct common_audit_data *ad);
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto);
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad);
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto);
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
@@ -64,19 +56,21 @@ static inline int selinux_xfrm_enabled(void)
        return 0;
 }
 
-static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad)
+static inline int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                                           struct common_audit_data *ad)
 {
        return 0;
 }
 
-static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct common_audit_data *ad, u8 proto)
+static inline int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                                             struct common_audit_data *ad,
+                                             u8 proto)
 {
        return 0;
 }
 
-static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
+static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid,
+                                             int ckall)
 {
        *sid = SECSID_NULL;
        return 0;
@@ -87,10 +81,9 @@ static inline void selinux_xfrm_notify_policyload(void)
 }
 #endif
 
-static inline void selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
+static inline int selinux_skb_xfrm_sid(struct sk_buff *skb, u32 *sid)
 {
-       int err = selinux_xfrm_decode_session(skb, sid, 0);
-       BUG_ON(err);
+       return selinux_xfrm_decode_session(skb, sid, 0);
 }
 
 #endif /* _SELINUX_XFRM_H_ */
index da4b8b2332802c9624f2f7f49ea8d622f96e180a..6235d052338b2e63b838711ed09c7ba1b04c67c6 100644 (file)
@@ -442,8 +442,7 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
            sksec->nlbl_state != NLBL_CONNLABELED)
                return 0;
 
-       local_bh_disable();
-       bh_lock_sock_nested(sk);
+       lock_sock(sk);
 
        /* connected sockets are allowed to disconnect when the address family
         * is set to AF_UNSPEC, if that is what is happening we want to reset
@@ -464,7 +463,6 @@ int selinux_netlbl_socket_connect(struct sock *sk, struct sockaddr *addr)
                sksec->nlbl_state = NLBL_CONNLABELED;
 
 socket_connect_return:
-       bh_unlock_sock(sk);
-       local_bh_enable();
+       release_sock(sk);
        return rc;
 }
index c5454c0477c346e4d814f5ff209feba86e5b86ad..03a72c32afd738ccad5c188bbe853202c32f53f6 100644 (file)
@@ -166,6 +166,7 @@ static void sel_netnode_insert(struct sel_netnode *node)
                break;
        default:
                BUG();
+               return;
        }
 
        /* we need to impose a limit on the growth of the hash table so check
@@ -225,6 +226,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
                break;
        default:
                BUG();
+               ret = -EINVAL;
        }
        if (ret != 0)
                goto out;
index ff427733c2903cab275a05da0887478850e1e374..5122affe06a8840e193150d62bd9b2f996fe67fe 100644 (file)
@@ -44,7 +44,9 @@
 /* Policy capability filenames */
 static char *policycap_names[] = {
        "network_peer_controls",
-       "open_perms"
+       "open_perms",
+       "redhat1",
+       "always_check_network"
 };
 
 unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
index 30f119b1d1ec36a95dc456c52b5b0ac1a6868514..820313a04d49bf4c4a8bc0f04ea01514ff184a64 100644 (file)
@@ -213,7 +213,12 @@ netlbl_import_failure:
 }
 #endif /* CONFIG_NETLABEL */
 
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
+/*
+ * Check to see if all the bits set in e2 are also set in e1. Optionally,
+ * if last_e2bit is non-zero, the highest set bit in e2 cannot exceed
+ * last_e2bit.
+ */
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit)
 {
        struct ebitmap_node *n1, *n2;
        int i;
@@ -223,14 +228,25 @@ int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2)
 
        n1 = e1->node;
        n2 = e2->node;
+
        while (n1 && n2 && (n1->startbit <= n2->startbit)) {
                if (n1->startbit < n2->startbit) {
                        n1 = n1->next;
                        continue;
                }
-               for (i = 0; i < EBITMAP_UNIT_NUMS; i++) {
+               for (i = EBITMAP_UNIT_NUMS - 1; (i >= 0) && !n2->maps[i]; )
+                       i--;    /* Skip trailing NULL map entries */
+               if (last_e2bit && (i >= 0)) {
+                       u32 lastsetbit = n2->startbit + i * EBITMAP_UNIT_SIZE +
+                                        __fls(n2->maps[i]);
+                       if (lastsetbit > last_e2bit)
+                               return 0;
+               }
+
+               while (i >= 0) {
                        if ((n1->maps[i] & n2->maps[i]) != n2->maps[i])
                                return 0;
+                       i--;
                }
 
                n1 = n1->next;
index 922f8afa89dd5837e2617daaf793db0e40ad009e..712c8a7b8e8b879d3835b5ee3650b66baa46e106 100644 (file)
 
 #include <net/netlabel.h>
 
-#define EBITMAP_UNIT_NUMS      ((32 - sizeof(void *) - sizeof(u32))    \
+#ifdef CONFIG_64BIT
+#define        EBITMAP_NODE_SIZE       64
+#else
+#define        EBITMAP_NODE_SIZE       32
+#endif
+
+#define EBITMAP_UNIT_NUMS      ((EBITMAP_NODE_SIZE-sizeof(void *)-sizeof(u32))\
                                        / sizeof(unsigned long))
 #define EBITMAP_UNIT_SIZE      BITS_PER_LONG
 #define EBITMAP_SIZE           (EBITMAP_UNIT_NUMS * EBITMAP_UNIT_SIZE)
@@ -117,7 +123,7 @@ static inline void ebitmap_node_clr_bit(struct ebitmap_node *n,
 
 int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2);
 int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src);
-int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2);
+int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2, u32 last_e2bit);
 int ebitmap_get_bit(struct ebitmap *e, unsigned long bit);
 int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value);
 void ebitmap_destroy(struct ebitmap *e);
index 40de8d3f208ecf95db162f4ae355d0d53ba99265..c85bc1ec040c0c58f93772004361cbcd04861575 100644 (file)
@@ -160,8 +160,6 @@ void mls_sid_to_context(struct context *context,
 int mls_level_isvalid(struct policydb *p, struct mls_level *l)
 {
        struct level_datum *levdatum;
-       struct ebitmap_node *node;
-       int i;
 
        if (!l->sens || l->sens > p->p_levels.nprim)
                return 0;
@@ -170,19 +168,13 @@ int mls_level_isvalid(struct policydb *p, struct mls_level *l)
        if (!levdatum)
                return 0;
 
-       ebitmap_for_each_positive_bit(&l->cat, node, i) {
-               if (i > p->p_cats.nprim)
-                       return 0;
-               if (!ebitmap_get_bit(&levdatum->level->cat, i)) {
-                       /*
-                        * Category may not be associated with
-                        * sensitivity.
-                        */
-                       return 0;
-               }
-       }
-
-       return 1;
+       /*
+        * Return 1 iff all the bits set in l->cat are also be set in
+        * levdatum->level->cat and no bit in l->cat is larger than
+        * p->p_cats.nprim.
+        */
+       return ebitmap_contains(&levdatum->level->cat, &l->cat,
+                               p->p_cats.nprim);
 }
 
 int mls_range_isvalid(struct policydb *p, struct mls_range *r)
index 03bed52a80526abfbda766a33859595cc1d8bfa5..e93648774137c601f5ec90ce14a03983655ce36d 100644 (file)
@@ -35,7 +35,7 @@ static inline int mls_level_eq(struct mls_level *l1, struct mls_level *l2)
 static inline int mls_level_dom(struct mls_level *l1, struct mls_level *l2)
 {
        return ((l1->sens >= l2->sens) &&
-               ebitmap_contains(&l1->cat, &l2->cat));
+               ebitmap_contains(&l1->cat, &l2->cat, 0));
 }
 
 #define mls_level_incomp(l1, l2) \
index c8adde3aff8fdbe93fb2f867e55f71b9879685a5..f6195ebde3c94eef0cdf1cf92933246069b25059 100644 (file)
@@ -3203,9 +3203,8 @@ static int range_write_helper(void *key, void *data, void *ptr)
 
 static int range_write(struct policydb *p, void *fp)
 {
-       size_t nel;
        __le32 buf[1];
-       int rc;
+       int rc, nel;
        struct policy_data pd;
 
        pd.p = p;
index b4feecc3fe0110d10bbdc183c369a03ab8495a6c..ee470a0b5c27fdad95a59b258792b6182435b999 100644 (file)
@@ -72,6 +72,7 @@
 
 int selinux_policycap_netpeer;
 int selinux_policycap_openperm;
+int selinux_policycap_alwaysnetwork;
 
 static DEFINE_RWLOCK(policy_rwlock);
 
@@ -1812,6 +1813,8 @@ static void security_load_policycaps(void)
                                                  POLICYDB_CAPABILITY_NETPEER);
        selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps,
                                                  POLICYDB_CAPABILITY_OPENPERM);
+       selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
+                                                 POLICYDB_CAPABILITY_ALWAYSNETWORK);
 }
 
 static int security_preserve_bools(struct policydb *p);
@@ -2323,43 +2326,74 @@ out:
 
 /**
  * security_fs_use - Determine how to handle labeling for a filesystem.
- * @fstype: filesystem type
- * @behavior: labeling behavior
- * @sid: SID for filesystem (superblock)
+ * @sb: superblock in question
  */
-int security_fs_use(
-       const char *fstype,
-       unsigned int *behavior,
-       u32 *sid)
+int security_fs_use(struct super_block *sb)
 {
        int rc = 0;
        struct ocontext *c;
+       struct superblock_security_struct *sbsec = sb->s_security;
+       const char *fstype = sb->s_type->name;
+       const char *subtype = (sb->s_subtype && sb->s_subtype[0]) ? sb->s_subtype : NULL;
+       struct ocontext *base = NULL;
 
        read_lock(&policy_rwlock);
 
-       c = policydb.ocontexts[OCON_FSUSE];
-       while (c) {
-               if (strcmp(fstype, c->u.name) == 0)
+       for (c = policydb.ocontexts[OCON_FSUSE]; c; c = c->next) {
+               char *sub;
+               int baselen;
+
+               baselen = strlen(fstype);
+
+               /* if base does not match, this is not the one */
+               if (strncmp(fstype, c->u.name, baselen))
+                       continue;
+
+               /* if there is no subtype, this is the one! */
+               if (!subtype)
+                       break;
+
+               /* skip past the base in this entry */
+               sub = c->u.name + baselen;
+
+               /* entry is only a base. save it. keep looking for subtype */
+               if (sub[0] == '\0') {
+                       base = c;
+                       continue;
+               }
+
+               /* entry is not followed by a subtype, so it is not a match */
+               if (sub[0] != '.')
+                       continue;
+
+               /* whew, we found a subtype of this fstype */
+               sub++; /* move past '.' */
+
+               /* exact match of fstype AND subtype */
+               if (!strcmp(subtype, sub))
                        break;
-               c = c->next;
        }
 
+       /* in case we had found an fstype match but no subtype match */
+       if (!c)
+               c = base;
+
        if (c) {
-               *behavior = c->v.behavior;
+               sbsec->behavior = c->v.behavior;
                if (!c->sid[0]) {
                        rc = sidtab_context_to_sid(&sidtab, &c->context[0],
                                                   &c->sid[0]);
                        if (rc)
                                goto out;
                }
-               *sid = c->sid[0];
+               sbsec->sid = c->sid[0];
        } else {
-               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, sid);
+               rc = security_genfs_sid(fstype, "/", SECCLASS_DIR, &sbsec->sid);
                if (rc) {
-                       *behavior = SECURITY_FS_USE_NONE;
+                       sbsec->behavior = SECURITY_FS_USE_NONE;
                        rc = 0;
                } else {
-                       *behavior = SECURITY_FS_USE_GENFS;
+                       sbsec->behavior = SECURITY_FS_USE_GENFS;
                }
        }
 
index d030818862146732ebe30c8cc3f266d485ef0677..a91d205ec0c6094cc9a0fecb5d427d4d24b1ed9a 100644 (file)
@@ -56,7 +56,7 @@
 atomic_t selinux_xfrm_refcount = ATOMIC_INIT(0);
 
 /*
- * Returns true if an LSM/SELinux context
+ * Returns true if the context is an LSM/SELinux context.
  */
 static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
 {
@@ -66,7 +66,7 @@ static inline int selinux_authorizable_ctx(struct xfrm_sec_ctx *ctx)
 }
 
 /*
- * Returns true if the xfrm contains a security blob for SELinux
+ * Returns true if the xfrm contains a security blob for SELinux.
  */
 static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
 {
@@ -74,48 +74,111 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
 }
 
 /*
- * LSM hook implementation that authorizes that a flow can use
- * a xfrm policy rule.
+ * Allocates a xfrm_sec_state and populates it using the supplied security
+ * xfrm_user_sec_ctx context.
  */
-int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp,
+                                  struct xfrm_user_sec_ctx *uctx)
 {
        int rc;
-       u32 sel_sid;
+       const struct task_security_struct *tsec = current_security();
+       struct xfrm_sec_ctx *ctx = NULL;
+       u32 str_len;
 
-       /* Context sid is either set to label or ANY_ASSOC */
-       if (ctx) {
-               if (!selinux_authorizable_ctx(ctx))
-                       return -EINVAL;
-
-               sel_sid = ctx->ctx_sid;
-       } else
-               /*
-                * All flows should be treated as polmatch'ing an
-                * otherwise applicable "non-labeled" policy. This
-                * would prevent inadvertent "leaks".
-                */
-               return 0;
+       if (ctxp == NULL || uctx == NULL ||
+           uctx->ctx_doi != XFRM_SC_DOI_LSM ||
+           uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
+               return -EINVAL;
 
-       rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__POLMATCH,
-                         NULL);
+       str_len = uctx->ctx_len;
+       if (str_len >= PAGE_SIZE)
+               return -ENOMEM;
 
-       if (rc == -EACCES)
-               return -ESRCH;
+       ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
 
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, &uctx[1], str_len);
+       ctx->ctx_str[str_len] = '\0';
+       rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid);
+       if (rc)
+               goto err;
+
+       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL);
+       if (rc)
+               goto err;
+
+       *ctxp = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
+
+err:
+       kfree(ctx);
        return rc;
 }
 
+/*
+ * Free the xfrm_sec_ctx structure.
+ */
+static void selinux_xfrm_free(struct xfrm_sec_ctx *ctx)
+{
+       if (!ctx)
+               return;
+
+       atomic_dec(&selinux_xfrm_refcount);
+       kfree(ctx);
+}
+
+/*
+ * Authorize the deletion of a labeled SA or policy rule.
+ */
+static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx)
+{
+       const struct task_security_struct *tsec = current_security();
+
+       if (!ctx)
+               return 0;
+
+       return avc_has_perm(tsec->sid, ctx->ctx_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
+                           NULL);
+}
+
+/*
+ * LSM hook implementation that authorizes that a flow can use a xfrm policy
+ * rule.
+ */
+int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir)
+{
+       int rc;
+
+       /* All flows should be treated as polmatch'ing an otherwise applicable
+        * "non-labeled" policy. This would prevent inadvertent "leaks". */
+       if (!ctx)
+               return 0;
+
+       /* Context sid is either set to label or ANY_ASSOC */
+       if (!selinux_authorizable_ctx(ctx))
+               return -EINVAL;
+
+       rc = avc_has_perm(fl_secid, ctx->ctx_sid,
+                         SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL);
+       return (rc == -EACCES ? -ESRCH : rc);
+}
+
 /*
  * LSM hook implementation that authorizes that a state matches
  * the given policy, flow combo.
  */
-
-int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp,
-                       const struct flowi *fl)
+int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
+                                     struct xfrm_policy *xp,
+                                     const struct flowi *fl)
 {
        u32 state_sid;
-       int rc;
 
        if (!xp->security)
                if (x->security)
@@ -138,187 +201,80 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
        if (fl->flowi_secid != state_sid)
                return 0;
 
-       rc = avc_has_perm(fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO,
-                         NULL)? 0:1;
-
-       /*
-        * We don't need a separate SA Vs. policy polmatch check
-        * since the SA is now of the same label as the flow and
-        * a flow Vs. policy polmatch check had already happened
-        * in selinux_xfrm_policy_lookup() above.
-        */
-
-       return rc;
+       /* We don't need a separate SA Vs. policy polmatch check since the SA
+        * is now of the same label as the flow and a flow Vs. policy polmatch
+        * check had already happened in selinux_xfrm_policy_lookup() above. */
+       return (avc_has_perm(fl->flowi_secid, state_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO,
+                           NULL) ? 0 : 1);
 }
 
 /*
  * LSM hook implementation that checks and/or returns the xfrm sid for the
  * incoming packet.
  */
-
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall)
 {
+       u32 sid_session = SECSID_NULL;
        struct sec_path *sp;
 
-       *sid = SECSID_NULL;
-
        if (skb == NULL)
-               return 0;
+               goto out;
 
        sp = skb->sp;
        if (sp) {
-               int i, sid_set = 0;
+               int i;
 
-               for (i = sp->len-1; i >= 0; i--) {
+               for (i = sp->len - 1; i >= 0; i--) {
                        struct xfrm_state *x = sp->xvec[i];
                        if (selinux_authorizable_xfrm(x)) {
                                struct xfrm_sec_ctx *ctx = x->security;
 
-                               if (!sid_set) {
-                                       *sid = ctx->ctx_sid;
-                                       sid_set = 1;
-
+                               if (sid_session == SECSID_NULL) {
+                                       sid_session = ctx->ctx_sid;
                                        if (!ckall)
-                                               break;
-                               } else if (*sid != ctx->ctx_sid)
+                                               goto out;
+                               } else if (sid_session != ctx->ctx_sid) {
+                                       *sid = SECSID_NULL;
                                        return -EINVAL;
+                               }
                        }
                }
        }
 
-       return 0;
-}
-
-/*
- * Security blob allocation for xfrm_policy and xfrm_state
- * CTX does not have a meaningful value on input
- */
-static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
-       struct xfrm_user_sec_ctx *uctx, u32 sid)
-{
-       int rc = 0;
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = NULL;
-       char *ctx_str = NULL;
-       u32 str_len;
-
-       BUG_ON(uctx && sid);
-
-       if (!uctx)
-               goto not_from_user;
-
-       if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
-               return -EINVAL;
-
-       str_len = uctx->ctx_len;
-       if (str_len >= PAGE_SIZE)
-               return -ENOMEM;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len + 1,
-                             GFP_KERNEL);
-
-       if (!ctx)
-               return -ENOMEM;
-
-       ctx->ctx_doi = uctx->ctx_doi;
-       ctx->ctx_len = str_len;
-       ctx->ctx_alg = uctx->ctx_alg;
-
-       memcpy(ctx->ctx_str,
-              uctx+1,
-              str_len);
-       ctx->ctx_str[str_len] = 0;
-       rc = security_context_to_sid(ctx->ctx_str,
-                                    str_len,
-                                    &ctx->ctx_sid);
-
-       if (rc)
-               goto out;
-
-       /*
-        * Does the subject have permission to set security context?
-        */
-       rc = avc_has_perm(tsec->sid, ctx->ctx_sid,
-                         SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SETCONTEXT, NULL);
-       if (rc)
-               goto out;
-
-       return rc;
-
-not_from_user:
-       rc = security_sid_to_context(sid, &ctx_str, &str_len);
-       if (rc)
-               goto out;
-
-       *ctxp = ctx = kmalloc(sizeof(*ctx) +
-                             str_len,
-                             GFP_ATOMIC);
-
-       if (!ctx) {
-               rc = -ENOMEM;
-               goto out;
-       }
-
-       ctx->ctx_doi = XFRM_SC_DOI_LSM;
-       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
-       ctx->ctx_sid = sid;
-       ctx->ctx_len = str_len;
-       memcpy(ctx->ctx_str,
-              ctx_str,
-              str_len);
-
-       goto out2;
-
 out:
-       *ctxp = NULL;
-       kfree(ctx);
-out2:
-       kfree(ctx_str);
-       return rc;
+       *sid = sid_session;
+       return 0;
 }
 
 /*
- * LSM hook implementation that allocs and transfers uctx spec to
- * xfrm_policy.
+ * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy.
  */
 int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp,
                              struct xfrm_user_sec_ctx *uctx)
 {
-       int err;
-
-       BUG_ON(!uctx);
-
-       err = selinux_xfrm_sec_ctx_alloc(ctxp, uctx, 0);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-
-       return err;
+       return selinux_xfrm_alloc_user(ctxp, uctx);
 }
 
-
 /*
- * LSM hook implementation that copies security data structure from old to
- * new for policy cloning.
+ * LSM hook implementation that copies security data structure from old to new
+ * for policy cloning.
  */
 int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
                              struct xfrm_sec_ctx **new_ctxp)
 {
        struct xfrm_sec_ctx *new_ctx;
 
-       if (old_ctx) {
-               new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len,
-                                 GFP_ATOMIC);
-               if (!new_ctx)
-                       return -ENOMEM;
+       if (!old_ctx)
+               return 0;
+
+       new_ctx = kmemdup(old_ctx, sizeof(*old_ctx) + old_ctx->ctx_len,
+                         GFP_ATOMIC);
+       if (!new_ctx)
+               return -ENOMEM;
+       atomic_inc(&selinux_xfrm_refcount);
+       *new_ctxp = new_ctx;
 
-               memcpy(new_ctx, old_ctx, sizeof(*new_ctx));
-               memcpy(new_ctx->ctx_str, old_ctx->ctx_str, new_ctx->ctx_len);
-               atomic_inc(&selinux_xfrm_refcount);
-               *new_ctxp = new_ctx;
-       }
        return 0;
 }
 
@@ -327,8 +283,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
  */
 void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
 {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(ctx);
+       selinux_xfrm_free(ctx);
 }
 
 /*
@@ -336,31 +291,55 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx)
  */
 int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx)
 {
-       const struct task_security_struct *tsec = current_security();
-
-       if (!ctx)
-               return 0;
+       return selinux_xfrm_delete(ctx);
+}
 
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+/*
+ * LSM hook implementation that allocates a xfrm_sec_state, populates it using
+ * the supplied security context, and assigns it to the xfrm_state.
+ */
+int selinux_xfrm_state_alloc(struct xfrm_state *x,
+                            struct xfrm_user_sec_ctx *uctx)
+{
+       return selinux_xfrm_alloc_user(&x->security, uctx);
 }
 
 /*
- * LSM hook implementation that allocs and transfers sec_ctx spec to
- * xfrm_state.
+ * LSM hook implementation that allocates a xfrm_sec_state and populates based
+ * on a secid.
  */
-int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx,
-               u32 secid)
+int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x,
+                                    struct xfrm_sec_ctx *polsec, u32 secid)
 {
-       int err;
+       int rc;
+       struct xfrm_sec_ctx *ctx;
+       char *ctx_str = NULL;
+       int str_len;
+
+       if (!polsec)
+               return 0;
 
-       BUG_ON(!x);
+       if (secid == 0)
+               return -EINVAL;
 
-       err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, secid);
-       if (err == 0)
-               atomic_inc(&selinux_xfrm_refcount);
-       return err;
+       rc = security_sid_to_context(secid, &ctx_str, &str_len);
+       if (rc)
+               return rc;
+
+       ctx = kmalloc(sizeof(*ctx) + str_len, GFP_ATOMIC);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->ctx_doi = XFRM_SC_DOI_LSM;
+       ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
+       ctx->ctx_sid = secid;
+       ctx->ctx_len = str_len;
+       memcpy(ctx->ctx_str, ctx_str, str_len);
+       kfree(ctx_str);
+
+       x->security = ctx;
+       atomic_inc(&selinux_xfrm_refcount);
+       return 0;
 }
 
 /*
@@ -368,24 +347,15 @@ int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uct
  */
 void selinux_xfrm_state_free(struct xfrm_state *x)
 {
-       atomic_dec(&selinux_xfrm_refcount);
-       kfree(x->security);
+       selinux_xfrm_free(x->security);
 }
 
- /*
 * LSM hook implementation that authorizes deletion of labeled SAs.
 */
+/*
+ * LSM hook implementation that authorizes deletion of labeled SAs.
+ */
 int selinux_xfrm_state_delete(struct xfrm_state *x)
 {
-       const struct task_security_struct *tsec = current_security();
-       struct xfrm_sec_ctx *ctx = x->security;
-
-       if (!ctx)
-               return 0;
-
-       return avc_has_perm(tsec->sid, ctx->ctx_sid,
-                           SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT,
-                           NULL);
+       return selinux_xfrm_delete(x->security);
 }
 
 /*
@@ -395,14 +365,12 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
  * we need to check for unlabelled access since this may not have
  * gone thru the IPSec process.
  */
-int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                               struct common_audit_data *ad)
+int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb,
+                             struct common_audit_data *ad)
 {
-       int i, rc = 0;
-       struct sec_path *sp;
-       u32 sel_sid = SECINITSID_UNLABELED;
-
-       sp = skb->sp;
+       int i;
+       struct sec_path *sp = skb->sp;
+       u32 peer_sid = SECINITSID_UNLABELED;
 
        if (sp) {
                for (i = 0; i < sp->len; i++) {
@@ -410,23 +378,17 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
 
                        if (x && selinux_authorizable_xfrm(x)) {
                                struct xfrm_sec_ctx *ctx = x->security;
-                               sel_sid = ctx->ctx_sid;
+                               peer_sid = ctx->ctx_sid;
                                break;
                        }
                }
        }
 
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
-
-       rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__RECVFROM, ad);
-
-       return rc;
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, peer_sid,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad);
 }
 
 /*
@@ -436,49 +398,38 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
  * If we do have a authorizable security association, then it has already been
  * checked in the selinux_xfrm_state_pol_flow_match hook above.
  */
-int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                                       struct common_audit_data *ad, u8 proto)
+int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb,
+                               struct common_audit_data *ad, u8 proto)
 {
        struct dst_entry *dst;
-       int rc = 0;
-
-       dst = skb_dst(skb);
-
-       if (dst) {
-               struct dst_entry *dst_test;
-
-               for (dst_test = dst; dst_test != NULL;
-                    dst_test = dst_test->child) {
-                       struct xfrm_state *x = dst_test->xfrm;
-
-                       if (x && selinux_authorizable_xfrm(x))
-                               goto out;
-               }
-       }
 
        switch (proto) {
        case IPPROTO_AH:
        case IPPROTO_ESP:
        case IPPROTO_COMP:
-               /*
-                * We should have already seen this packet once before
-                * it underwent xfrm(s). No need to subject it to the
-                * unlabeled check.
-                */
-               goto out;
+               /* We should have already seen this packet once before it
+                * underwent xfrm(s). No need to subject it to the unlabeled
+                * check. */
+               return 0;
        default:
                break;
        }
 
-       /*
-        * This check even when there's no association involved is
-        * intended, according to Trent Jaeger, to make sure a
-        * process can't engage in non-ipsec communication unless
-        * explicitly allowed by policy.
-        */
+       dst = skb_dst(skb);
+       if (dst) {
+               struct dst_entry *iter;
 
-       rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION,
-                         ASSOCIATION__SENDTO, ad);
-out:
-       return rc;
+               for (iter = dst; iter != NULL; iter = iter->child) {
+                       struct xfrm_state *x = iter->xfrm;
+
+                       if (x && selinux_authorizable_xfrm(x))
+                               return 0;
+               }
+       }
+
+       /* This check even when there's no association involved is intended,
+        * according to Trent Jaeger, to make sure a process can't engage in
+        * non-IPsec communication unless explicitly allowed by policy. */
+       return avc_has_perm(sk_sid, SECINITSID_UNLABELED,
+                           SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad);
 }