GNU Linux-libre 4.14.266-gnu1
[releases.git] / drivers / staging / lustre / lustre / ptlrpc / sec.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2012, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ptlrpc/sec.c
33  *
34  * Author: Eric Mei <ericm@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_SEC
38
39 #include <linux/libcfs/libcfs.h>
40 #include <linux/crypto.h>
41 #include <linux/cred.h>
42 #include <linux/key.h>
43 #include <linux/sched/task.h>
44
45 #include <obd.h>
46 #include <obd_class.h>
47 #include <obd_support.h>
48 #include <lustre_net.h>
49 #include <lustre_import.h>
50 #include <lustre_dlm.h>
51 #include <lustre_sec.h>
52
53 #include "ptlrpc_internal.h"
54
55 /***********************************************
56  * policy registers                         *
57  ***********************************************/
58
59 static rwlock_t policy_lock;
60 static struct ptlrpc_sec_policy *policies[SPTLRPC_POLICY_MAX] = {
61         NULL,
62 };
63
64 int sptlrpc_register_policy(struct ptlrpc_sec_policy *policy)
65 {
66         __u16 number = policy->sp_policy;
67
68         LASSERT(policy->sp_name);
69         LASSERT(policy->sp_cops);
70         LASSERT(policy->sp_sops);
71
72         if (number >= SPTLRPC_POLICY_MAX)
73                 return -EINVAL;
74
75         write_lock(&policy_lock);
76         if (unlikely(policies[number])) {
77                 write_unlock(&policy_lock);
78                 return -EALREADY;
79         }
80         policies[number] = policy;
81         write_unlock(&policy_lock);
82
83         CDEBUG(D_SEC, "%s: registered\n", policy->sp_name);
84         return 0;
85 }
86 EXPORT_SYMBOL(sptlrpc_register_policy);
87
88 int sptlrpc_unregister_policy(struct ptlrpc_sec_policy *policy)
89 {
90         __u16 number = policy->sp_policy;
91
92         LASSERT(number < SPTLRPC_POLICY_MAX);
93
94         write_lock(&policy_lock);
95         if (unlikely(!policies[number])) {
96                 write_unlock(&policy_lock);
97                 CERROR("%s: already unregistered\n", policy->sp_name);
98                 return -EINVAL;
99         }
100
101         LASSERT(policies[number] == policy);
102         policies[number] = NULL;
103         write_unlock(&policy_lock);
104
105         CDEBUG(D_SEC, "%s: unregistered\n", policy->sp_name);
106         return 0;
107 }
108 EXPORT_SYMBOL(sptlrpc_unregister_policy);
109
110 static
111 struct ptlrpc_sec_policy *sptlrpc_wireflavor2policy(__u32 flavor)
112 {
113         static DEFINE_MUTEX(load_mutex);
114         static atomic_t loaded = ATOMIC_INIT(0);
115         struct ptlrpc_sec_policy *policy;
116         __u16 number = SPTLRPC_FLVR_POLICY(flavor);
117         __u16 flag = 0;
118
119         if (number >= SPTLRPC_POLICY_MAX)
120                 return NULL;
121
122         while (1) {
123                 read_lock(&policy_lock);
124                 policy = policies[number];
125                 if (policy && !try_module_get(policy->sp_owner))
126                         policy = NULL;
127                 if (!policy)
128                         flag = atomic_read(&loaded);
129                 read_unlock(&policy_lock);
130
131                 if (policy || flag != 0 ||
132                     number != SPTLRPC_POLICY_GSS)
133                         break;
134
135                 /* try to load gss module, once */
136                 mutex_lock(&load_mutex);
137                 if (atomic_read(&loaded) == 0) {
138                         if (request_module("ptlrpc_gss") == 0)
139                                 CDEBUG(D_SEC,
140                                        "module ptlrpc_gss loaded on demand\n");
141                         else
142                                 CERROR("Unable to load module ptlrpc_gss\n");
143
144                         atomic_set(&loaded, 1);
145                 }
146                 mutex_unlock(&load_mutex);
147         }
148
149         return policy;
150 }
151
152 __u32 sptlrpc_name2flavor_base(const char *name)
153 {
154         if (!strcmp(name, "null"))
155                 return SPTLRPC_FLVR_NULL;
156         if (!strcmp(name, "plain"))
157                 return SPTLRPC_FLVR_PLAIN;
158         if (!strcmp(name, "krb5n"))
159                 return SPTLRPC_FLVR_KRB5N;
160         if (!strcmp(name, "krb5a"))
161                 return SPTLRPC_FLVR_KRB5A;
162         if (!strcmp(name, "krb5i"))
163                 return SPTLRPC_FLVR_KRB5I;
164         if (!strcmp(name, "krb5p"))
165                 return SPTLRPC_FLVR_KRB5P;
166
167         return SPTLRPC_FLVR_INVALID;
168 }
169 EXPORT_SYMBOL(sptlrpc_name2flavor_base);
170
171 const char *sptlrpc_flavor2name_base(__u32 flvr)
172 {
173         __u32   base = SPTLRPC_FLVR_BASE(flvr);
174
175         if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL))
176                 return "null";
177         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_PLAIN))
178                 return "plain";
179         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5N))
180                 return "krb5n";
181         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5A))
182                 return "krb5a";
183         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5I))
184                 return "krb5i";
185         else if (base == SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_KRB5P))
186                 return "krb5p";
187
188         CERROR("invalid wire flavor 0x%x\n", flvr);
189         return "invalid";
190 }
191 EXPORT_SYMBOL(sptlrpc_flavor2name_base);
192
193 char *sptlrpc_flavor2name_bulk(struct sptlrpc_flavor *sf,
194                                char *buf, int bufsize)
195 {
196         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN)
197                 snprintf(buf, bufsize, "hash:%s",
198                          sptlrpc_get_hash_name(sf->u_bulk.hash.hash_alg));
199         else
200                 snprintf(buf, bufsize, "%s",
201                          sptlrpc_flavor2name_base(sf->sf_rpc));
202
203         buf[bufsize - 1] = '\0';
204         return buf;
205 }
206 EXPORT_SYMBOL(sptlrpc_flavor2name_bulk);
207
208 char *sptlrpc_flavor2name(struct sptlrpc_flavor *sf, char *buf, int bufsize)
209 {
210         strlcpy(buf, sptlrpc_flavor2name_base(sf->sf_rpc), bufsize);
211
212         /*
213          * currently we don't support customized bulk specification for
214          * flavors other than plain
215          */
216         if (SPTLRPC_FLVR_POLICY(sf->sf_rpc) == SPTLRPC_POLICY_PLAIN) {
217                 char bspec[16];
218
219                 bspec[0] = '-';
220                 sptlrpc_flavor2name_bulk(sf, &bspec[1], sizeof(bspec) - 1);
221                 strlcat(buf, bspec, bufsize);
222         }
223
224         return buf;
225 }
226 EXPORT_SYMBOL(sptlrpc_flavor2name);
227
228 static char *sptlrpc_secflags2str(__u32 flags, char *buf, int bufsize)
229 {
230         buf[0] = '\0';
231
232         if (flags & PTLRPC_SEC_FL_REVERSE)
233                 strlcat(buf, "reverse,", bufsize);
234         if (flags & PTLRPC_SEC_FL_ROOTONLY)
235                 strlcat(buf, "rootonly,", bufsize);
236         if (flags & PTLRPC_SEC_FL_UDESC)
237                 strlcat(buf, "udesc,", bufsize);
238         if (flags & PTLRPC_SEC_FL_BULK)
239                 strlcat(buf, "bulk,", bufsize);
240         if (buf[0] == '\0')
241                 strlcat(buf, "-,", bufsize);
242
243         return buf;
244 }
245
246 /**************************************************
247  * client context APIs                      *
248  **************************************************/
249
250 static
251 struct ptlrpc_cli_ctx *get_my_ctx(struct ptlrpc_sec *sec)
252 {
253         struct vfs_cred vcred;
254         int create = 1, remove_dead = 1;
255
256         LASSERT(sec);
257         LASSERT(sec->ps_policy->sp_cops->lookup_ctx);
258
259         if (sec->ps_flvr.sf_flags & (PTLRPC_SEC_FL_REVERSE |
260                                      PTLRPC_SEC_FL_ROOTONLY)) {
261                 vcred.vc_uid = 0;
262                 vcred.vc_gid = 0;
263                 if (sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_REVERSE) {
264                         create = 0;
265                         remove_dead = 0;
266                 }
267         } else {
268                 vcred.vc_uid = from_kuid(&init_user_ns, current_uid());
269                 vcred.vc_gid = from_kgid(&init_user_ns, current_gid());
270         }
271
272         return sec->ps_policy->sp_cops->lookup_ctx(sec, &vcred,
273                                                    create, remove_dead);
274 }
275
276 struct ptlrpc_cli_ctx *sptlrpc_cli_ctx_get(struct ptlrpc_cli_ctx *ctx)
277 {
278         atomic_inc(&ctx->cc_refcount);
279         return ctx;
280 }
281 EXPORT_SYMBOL(sptlrpc_cli_ctx_get);
282
283 void sptlrpc_cli_ctx_put(struct ptlrpc_cli_ctx *ctx, int sync)
284 {
285         struct ptlrpc_sec *sec = ctx->cc_sec;
286
287         LASSERT(sec);
288         LASSERT_ATOMIC_POS(&ctx->cc_refcount);
289
290         if (!atomic_dec_and_test(&ctx->cc_refcount))
291                 return;
292
293         sec->ps_policy->sp_cops->release_ctx(sec, ctx, sync);
294 }
295 EXPORT_SYMBOL(sptlrpc_cli_ctx_put);
296
297 static int import_sec_check_expire(struct obd_import *imp)
298 {
299         int adapt = 0;
300
301         spin_lock(&imp->imp_lock);
302         if (imp->imp_sec_expire &&
303             imp->imp_sec_expire < ktime_get_real_seconds()) {
304                 adapt = 1;
305                 imp->imp_sec_expire = 0;
306         }
307         spin_unlock(&imp->imp_lock);
308
309         if (!adapt)
310                 return 0;
311
312         CDEBUG(D_SEC, "found delayed sec adapt expired, do it now\n");
313         return sptlrpc_import_sec_adapt(imp, NULL, NULL);
314 }
315
316 /**
317  * Get and validate the client side ptlrpc security facilities from
318  * \a imp. There is a race condition on client reconnect when the import is
319  * being destroyed while there are outstanding client bound requests. In
320  * this case do not output any error messages if import secuity is not
321  * found.
322  *
323  * \param[in] imp obd import associated with client
324  * \param[out] sec client side ptlrpc security
325  *
326  * \retval 0 if security retrieved successfully
327  * \retval -ve errno if there was a problem
328  */
329 static int import_sec_validate_get(struct obd_import *imp,
330                                    struct ptlrpc_sec **sec)
331 {
332         int rc;
333
334         if (unlikely(imp->imp_sec_expire)) {
335                 rc = import_sec_check_expire(imp);
336                 if (rc)
337                         return rc;
338         }
339
340         *sec = sptlrpc_import_sec_ref(imp);
341         /* Only output an error when the import is still active */
342         if (!*sec) {
343                 if (list_empty(&imp->imp_zombie_chain))
344                         CERROR("import %p (%s) with no sec\n",
345                                imp, ptlrpc_import_state_name(imp->imp_state));
346                 return -EACCES;
347         }
348
349         if (unlikely((*sec)->ps_dying)) {
350                 CERROR("attempt to use dying sec %p\n", sec);
351                 sptlrpc_sec_put(*sec);
352                 return -EACCES;
353         }
354
355         return 0;
356 }
357
358 /**
359  * Given a \a req, find or allocate a appropriate context for it.
360  * \pre req->rq_cli_ctx == NULL.
361  *
362  * \retval 0 succeed, and req->rq_cli_ctx is set.
363  * \retval -ev error number, and req->rq_cli_ctx == NULL.
364  */
365 int sptlrpc_req_get_ctx(struct ptlrpc_request *req)
366 {
367         struct obd_import *imp = req->rq_import;
368         struct ptlrpc_sec *sec;
369         int             rc;
370
371         LASSERT(!req->rq_cli_ctx);
372         LASSERT(imp);
373
374         rc = import_sec_validate_get(imp, &sec);
375         if (rc)
376                 return rc;
377
378         req->rq_cli_ctx = get_my_ctx(sec);
379
380         sptlrpc_sec_put(sec);
381
382         if (!req->rq_cli_ctx) {
383                 CERROR("req %p: fail to get context\n", req);
384                 return -ECONNREFUSED;
385         }
386
387         return 0;
388 }
389
390 /**
391  * Drop the context for \a req.
392  * \pre req->rq_cli_ctx != NULL.
393  * \post req->rq_cli_ctx == NULL.
394  *
395  * If \a sync == 0, this function should return quickly without sleep;
396  * otherwise it might trigger and wait for the whole process of sending
397  * an context-destroying rpc to server.
398  */
399 void sptlrpc_req_put_ctx(struct ptlrpc_request *req, int sync)
400 {
401         LASSERT(req);
402         LASSERT(req->rq_cli_ctx);
403
404         /* request might be asked to release earlier while still
405          * in the context waiting list.
406          */
407         if (!list_empty(&req->rq_ctx_chain)) {
408                 spin_lock(&req->rq_cli_ctx->cc_lock);
409                 list_del_init(&req->rq_ctx_chain);
410                 spin_unlock(&req->rq_cli_ctx->cc_lock);
411         }
412
413         sptlrpc_cli_ctx_put(req->rq_cli_ctx, sync);
414         req->rq_cli_ctx = NULL;
415 }
416
417 static
418 int sptlrpc_req_ctx_switch(struct ptlrpc_request *req,
419                            struct ptlrpc_cli_ctx *oldctx,
420                            struct ptlrpc_cli_ctx *newctx)
421 {
422         struct sptlrpc_flavor old_flvr;
423         char *reqmsg = NULL; /* to workaround old gcc */
424         int reqmsg_size;
425         int rc = 0;
426
427         LASSERT(req->rq_reqmsg);
428         LASSERT(req->rq_reqlen);
429         LASSERT(req->rq_replen);
430
431         CDEBUG(D_SEC, "req %p: switch ctx %p(%u->%s) -> %p(%u->%s), switch sec %p(%s) -> %p(%s)\n",
432                req,
433                oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec),
434                newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec),
435                oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name,
436                newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name);
437
438         /* save flavor */
439         old_flvr = req->rq_flvr;
440
441         /* save request message */
442         reqmsg_size = req->rq_reqlen;
443         if (reqmsg_size != 0) {
444                 reqmsg = libcfs_kvzalloc(reqmsg_size, GFP_NOFS);
445                 if (!reqmsg)
446                         return -ENOMEM;
447                 memcpy(reqmsg, req->rq_reqmsg, reqmsg_size);
448         }
449
450         /* release old req/rep buf */
451         req->rq_cli_ctx = oldctx;
452         sptlrpc_cli_free_reqbuf(req);
453         sptlrpc_cli_free_repbuf(req);
454         req->rq_cli_ctx = newctx;
455
456         /* recalculate the flavor */
457         sptlrpc_req_set_flavor(req, 0);
458
459         /* alloc new request buffer
460          * we don't need to alloc reply buffer here, leave it to the
461          * rest procedure of ptlrpc
462          */
463         if (reqmsg_size != 0) {
464                 rc = sptlrpc_cli_alloc_reqbuf(req, reqmsg_size);
465                 if (!rc) {
466                         LASSERT(req->rq_reqmsg);
467                         memcpy(req->rq_reqmsg, reqmsg, reqmsg_size);
468                 } else {
469                         CWARN("failed to alloc reqbuf: %d\n", rc);
470                         req->rq_flvr = old_flvr;
471                 }
472
473                 kvfree(reqmsg);
474         }
475         return rc;
476 }
477
478 /**
479  * If current context of \a req is dead somehow, e.g. we just switched flavor
480  * thus marked original contexts dead, we'll find a new context for it. if
481  * no switch is needed, \a req will end up with the same context.
482  *
483  * \note a request must have a context, to keep other parts of code happy.
484  * In any case of failure during the switching, we must restore the old one.
485  */
486 static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
487 {
488         struct ptlrpc_cli_ctx *oldctx = req->rq_cli_ctx;
489         struct ptlrpc_cli_ctx *newctx;
490         int rc;
491
492         LASSERT(oldctx);
493
494         sptlrpc_cli_ctx_get(oldctx);
495         sptlrpc_req_put_ctx(req, 0);
496
497         rc = sptlrpc_req_get_ctx(req);
498         if (unlikely(rc)) {
499                 LASSERT(!req->rq_cli_ctx);
500
501                 /* restore old ctx */
502                 req->rq_cli_ctx = oldctx;
503                 return rc;
504         }
505
506         newctx = req->rq_cli_ctx;
507         LASSERT(newctx);
508
509         if (unlikely(newctx == oldctx &&
510                      test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) {
511                 /*
512                  * still get the old dead ctx, usually means system too busy
513                  */
514                 CDEBUG(D_SEC,
515                        "ctx (%p, fl %lx) doesn't switch, relax a little bit\n",
516                        newctx, newctx->cc_flags);
517
518                 set_current_state(TASK_INTERRUPTIBLE);
519                 schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
520         } else if (unlikely(!test_bit(PTLRPC_CTX_UPTODATE_BIT, &newctx->cc_flags))) {
521                 /*
522                  * new ctx not up to date yet
523                  */
524                 CDEBUG(D_SEC,
525                        "ctx (%p, fl %lx) doesn't switch, not up to date yet\n",
526                        newctx, newctx->cc_flags);
527         } else {
528                 /*
529                  * it's possible newctx == oldctx if we're switching
530                  * subflavor with the same sec.
531                  */
532                 rc = sptlrpc_req_ctx_switch(req, oldctx, newctx);
533                 if (rc) {
534                         /* restore old ctx */
535                         sptlrpc_req_put_ctx(req, 0);
536                         req->rq_cli_ctx = oldctx;
537                         return rc;
538                 }
539
540                 LASSERT(req->rq_cli_ctx == newctx);
541         }
542
543         sptlrpc_cli_ctx_put(oldctx, 1);
544         return 0;
545 }
546
547 static
548 int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
549 {
550         if (cli_ctx_is_refreshed(ctx))
551                 return 1;
552         return 0;
553 }
554
555 static
556 int ctx_refresh_timeout(void *data)
557 {
558         struct ptlrpc_request *req = data;
559         int rc;
560
561         /* conn_cnt is needed in expire_one_request */
562         lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
563
564         rc = ptlrpc_expire_one_request(req, 1);
565         /* if we started recovery, we should mark this ctx dead; otherwise
566          * in case of lgssd died nobody would retire this ctx, following
567          * connecting will still find the same ctx thus cause deadlock.
568          * there's an assumption that expire time of the request should be
569          * later than the context refresh expire time.
570          */
571         if (rc == 0)
572                 req->rq_cli_ctx->cc_ops->force_die(req->rq_cli_ctx, 0);
573         return rc;
574 }
575
576 static
577 void ctx_refresh_interrupt(void *data)
578 {
579         struct ptlrpc_request *req = data;
580
581         spin_lock(&req->rq_lock);
582         req->rq_intr = 1;
583         spin_unlock(&req->rq_lock);
584 }
585
586 static
587 void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
588 {
589         spin_lock(&ctx->cc_lock);
590         if (!list_empty(&req->rq_ctx_chain))
591                 list_del_init(&req->rq_ctx_chain);
592         spin_unlock(&ctx->cc_lock);
593 }
594
595 /**
596  * To refresh the context of \req, if it's not up-to-date.
597  * \param timeout
598  * - < 0: don't wait
599  * - = 0: wait until success or fatal error occur
600  * - > 0: timeout value (in seconds)
601  *
602  * The status of the context could be subject to be changed by other threads
603  * at any time. We allow this race, but once we return with 0, the caller will
604  * suppose it's uptodated and keep using it until the owning rpc is done.
605  *
606  * \retval 0 only if the context is uptodated.
607  * \retval -ev error number.
608  */
609 int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
610 {
611         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
612         struct ptlrpc_sec *sec;
613         struct l_wait_info lwi;
614         int rc;
615
616         LASSERT(ctx);
617
618         if (req->rq_ctx_init || req->rq_ctx_fini)
619                 return 0;
620
621         /*
622          * during the process a request's context might change type even
623          * (e.g. from gss ctx to null ctx), so each loop we need to re-check
624          * everything
625          */
626 again:
627         rc = import_sec_validate_get(req->rq_import, &sec);
628         if (rc)
629                 return rc;
630
631         if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) {
632                 CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n",
633                        req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc);
634                 req_off_ctx_list(req, ctx);
635                 sptlrpc_req_replace_dead_ctx(req);
636                 ctx = req->rq_cli_ctx;
637         }
638         sptlrpc_sec_put(sec);
639
640         if (cli_ctx_is_eternal(ctx))
641                 return 0;
642
643         if (unlikely(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags))) {
644                 LASSERT(ctx->cc_ops->refresh);
645                 ctx->cc_ops->refresh(ctx);
646         }
647         LASSERT(test_bit(PTLRPC_CTX_NEW_BIT, &ctx->cc_flags) == 0);
648
649         LASSERT(ctx->cc_ops->validate);
650         if (ctx->cc_ops->validate(ctx) == 0) {
651                 req_off_ctx_list(req, ctx);
652                 return 0;
653         }
654
655         if (unlikely(test_bit(PTLRPC_CTX_ERROR_BIT, &ctx->cc_flags))) {
656                 spin_lock(&req->rq_lock);
657                 req->rq_err = 1;
658                 spin_unlock(&req->rq_lock);
659                 req_off_ctx_list(req, ctx);
660                 return -EPERM;
661         }
662
663         /*
664          * There's a subtle issue for resending RPCs, suppose following
665          * situation:
666          *  1. the request was sent to server.
667          *  2. recovery was kicked start, after finished the request was
668          *     marked as resent.
669          *  3. resend the request.
670          *  4. old reply from server received, we accept and verify the reply.
671          *     this has to be success, otherwise the error will be aware
672          *     by application.
673          *  5. new reply from server received, dropped by LNet.
674          *
675          * Note the xid of old & new request is the same. We can't simply
676          * change xid for the resent request because the server replies on
677          * it for reply reconstruction.
678          *
679          * Commonly the original context should be uptodate because we
680          * have a expiry nice time; server will keep its context because
681          * we at least hold a ref of old context which prevent context
682          * destroying RPC being sent. So server still can accept the request
683          * and finish the RPC. But if that's not the case:
684          *  1. If server side context has been trimmed, a NO_CONTEXT will
685          *     be returned, gss_cli_ctx_verify/unseal will switch to new
686          *     context by force.
687          *  2. Current context never be refreshed, then we are fine: we
688          *     never really send request with old context before.
689          */
690         if (test_bit(PTLRPC_CTX_UPTODATE_BIT, &ctx->cc_flags) &&
691             unlikely(req->rq_reqmsg) &&
692             lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
693                 req_off_ctx_list(req, ctx);
694                 return 0;
695         }
696
697         if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) {
698                 req_off_ctx_list(req, ctx);
699                 /*
700                  * don't switch ctx if import was deactivated
701                  */
702                 if (req->rq_import->imp_deactive) {
703                         spin_lock(&req->rq_lock);
704                         req->rq_err = 1;
705                         spin_unlock(&req->rq_lock);
706                         return -EINTR;
707                 }
708
709                 rc = sptlrpc_req_replace_dead_ctx(req);
710                 if (rc) {
711                         LASSERT(ctx == req->rq_cli_ctx);
712                         CERROR("req %p: failed to replace dead ctx %p: %d\n",
713                                req, ctx, rc);
714                         spin_lock(&req->rq_lock);
715                         req->rq_err = 1;
716                         spin_unlock(&req->rq_lock);
717                         return rc;
718                 }
719
720                 ctx = req->rq_cli_ctx;
721                 goto again;
722         }
723
724         /*
725          * Now we're sure this context is during upcall, add myself into
726          * waiting list
727          */
728         spin_lock(&ctx->cc_lock);
729         if (list_empty(&req->rq_ctx_chain))
730                 list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
731         spin_unlock(&ctx->cc_lock);
732
733         if (timeout < 0)
734                 return -EWOULDBLOCK;
735
736         /* Clear any flags that may be present from previous sends */
737         LASSERT(req->rq_receiving_reply == 0);
738         spin_lock(&req->rq_lock);
739         req->rq_err = 0;
740         req->rq_timedout = 0;
741         req->rq_resend = 0;
742         req->rq_restart = 0;
743         spin_unlock(&req->rq_lock);
744
745         lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC),
746                                ctx_refresh_timeout, ctx_refresh_interrupt,
747                                req);
748         rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
749
750         /*
751          * following cases could lead us here:
752          * - successfully refreshed;
753          * - interrupted;
754          * - timedout, and we don't want recover from the failure;
755          * - timedout, and waked up upon recovery finished;
756          * - someone else mark this ctx dead by force;
757          * - someone invalidate the req and call ptlrpc_client_wake_req(),
758          *   e.g. ptlrpc_abort_inflight();
759          */
760         if (!cli_ctx_is_refreshed(ctx)) {
761                 /* timed out or interrupted */
762                 req_off_ctx_list(req, ctx);
763
764                 LASSERT(rc != 0);
765                 return rc;
766         }
767
768         goto again;
769 }
770
771 /**
772  * Initialize flavor settings for \a req, according to \a opcode.
773  *
774  * \note this could be called in two situations:
775  * - new request from ptlrpc_pre_req(), with proper @opcode
776  * - old request which changed ctx in the middle, with @opcode == 0
777  */
778 void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
779 {
780         struct ptlrpc_sec *sec;
781
782         LASSERT(req->rq_import);
783         LASSERT(req->rq_cli_ctx);
784         LASSERT(req->rq_cli_ctx->cc_sec);
785         LASSERT(req->rq_bulk_read == 0 || req->rq_bulk_write == 0);
786
787         /* special security flags according to opcode */
788         switch (opcode) {
789         case OST_READ:
790         case MDS_READPAGE:
791         case MGS_CONFIG_READ:
792         case OBD_IDX_READ:
793                 req->rq_bulk_read = 1;
794                 break;
795         case OST_WRITE:
796         case MDS_WRITEPAGE:
797                 req->rq_bulk_write = 1;
798                 break;
799         case SEC_CTX_INIT:
800                 req->rq_ctx_init = 1;
801                 break;
802         case SEC_CTX_FINI:
803                 req->rq_ctx_fini = 1;
804                 break;
805         case 0:
806                 /* init/fini rpc won't be resend, so can't be here */
807                 LASSERT(req->rq_ctx_init == 0);
808                 LASSERT(req->rq_ctx_fini == 0);
809
810                 /* cleanup flags, which should be recalculated */
811                 req->rq_pack_udesc = 0;
812                 req->rq_pack_bulk = 0;
813                 break;
814         }
815
816         sec = req->rq_cli_ctx->cc_sec;
817
818         spin_lock(&sec->ps_lock);
819         req->rq_flvr = sec->ps_flvr;
820         spin_unlock(&sec->ps_lock);
821
822         /* force SVC_NULL for context initiation rpc, SVC_INTG for context
823          * destruction rpc
824          */
825         if (unlikely(req->rq_ctx_init))
826                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_NULL);
827         else if (unlikely(req->rq_ctx_fini))
828                 flvr_set_svc(&req->rq_flvr.sf_rpc, SPTLRPC_SVC_INTG);
829
830         /* user descriptor flag, null security can't do it anyway */
831         if ((sec->ps_flvr.sf_flags & PTLRPC_SEC_FL_UDESC) &&
832             (req->rq_flvr.sf_rpc != SPTLRPC_FLVR_NULL))
833                 req->rq_pack_udesc = 1;
834
835         /* bulk security flag */
836         if ((req->rq_bulk_read || req->rq_bulk_write) &&
837             sptlrpc_flavor_has_bulk(&req->rq_flvr))
838                 req->rq_pack_bulk = 1;
839 }
840
841 void sptlrpc_request_out_callback(struct ptlrpc_request *req)
842 {
843         if (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc) != SPTLRPC_SVC_PRIV)
844                 return;
845
846         LASSERT(req->rq_clrbuf);
847         if (req->rq_pool || !req->rq_reqbuf)
848                 return;
849
850         kvfree(req->rq_reqbuf);
851         req->rq_reqbuf = NULL;
852         req->rq_reqbuf_len = 0;
853 }
854
855 /**
856  * Given an import \a imp, check whether current user has a valid context
857  * or not. We may create a new context and try to refresh it, and try
858  * repeatedly try in case of non-fatal errors. Return 0 means success.
859  */
860 int sptlrpc_import_check_ctx(struct obd_import *imp)
861 {
862         struct ptlrpc_sec *sec;
863         struct ptlrpc_cli_ctx *ctx;
864         struct ptlrpc_request *req = NULL;
865         int rc;
866
867         might_sleep();
868
869         sec = sptlrpc_import_sec_ref(imp);
870         ctx = get_my_ctx(sec);
871         sptlrpc_sec_put(sec);
872
873         if (!ctx)
874                 return -ENOMEM;
875
876         if (cli_ctx_is_eternal(ctx) ||
877             ctx->cc_ops->validate(ctx) == 0) {
878                 sptlrpc_cli_ctx_put(ctx, 1);
879                 return 0;
880         }
881
882         if (cli_ctx_is_error(ctx)) {
883                 sptlrpc_cli_ctx_put(ctx, 1);
884                 return -EACCES;
885         }
886
887         req = ptlrpc_request_cache_alloc(GFP_NOFS);
888         if (!req)
889                 return -ENOMEM;
890
891         ptlrpc_cli_req_init(req);
892         atomic_set(&req->rq_refcount, 10000);
893
894         req->rq_import = imp;
895         req->rq_flvr = sec->ps_flvr;
896         req->rq_cli_ctx = ctx;
897
898         rc = sptlrpc_req_refresh_ctx(req, 0);
899         LASSERT(list_empty(&req->rq_ctx_chain));
900         sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
901         ptlrpc_request_cache_free(req);
902
903         return rc;
904 }
905
906 /**
907  * Used by ptlrpc client, to perform the pre-defined security transformation
908  * upon the request message of \a req. After this function called,
909  * req->rq_reqmsg is still accessible as clear text.
910  */
911 int sptlrpc_cli_wrap_request(struct ptlrpc_request *req)
912 {
913         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
914         int rc = 0;
915
916         LASSERT(ctx);
917         LASSERT(ctx->cc_sec);
918         LASSERT(req->rq_reqbuf || req->rq_clrbuf);
919
920         /* we wrap bulk request here because now we can be sure
921          * the context is uptodate.
922          */
923         if (req->rq_bulk) {
924                 rc = sptlrpc_cli_wrap_bulk(req, req->rq_bulk);
925                 if (rc)
926                         return rc;
927         }
928
929         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
930         case SPTLRPC_SVC_NULL:
931         case SPTLRPC_SVC_AUTH:
932         case SPTLRPC_SVC_INTG:
933                 LASSERT(ctx->cc_ops->sign);
934                 rc = ctx->cc_ops->sign(ctx, req);
935                 break;
936         case SPTLRPC_SVC_PRIV:
937                 LASSERT(ctx->cc_ops->seal);
938                 rc = ctx->cc_ops->seal(ctx, req);
939                 break;
940         default:
941                 LBUG();
942         }
943
944         if (rc == 0) {
945                 LASSERT(req->rq_reqdata_len);
946                 LASSERT(req->rq_reqdata_len % 8 == 0);
947                 LASSERT(req->rq_reqdata_len <= req->rq_reqbuf_len);
948         }
949
950         return rc;
951 }
952
953 static int do_cli_unwrap_reply(struct ptlrpc_request *req)
954 {
955         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
956         int rc;
957
958         LASSERT(ctx);
959         LASSERT(ctx->cc_sec);
960         LASSERT(req->rq_repbuf);
961         LASSERT(req->rq_repdata);
962         LASSERT(!req->rq_repmsg);
963
964         req->rq_rep_swab_mask = 0;
965
966         rc = __lustre_unpack_msg(req->rq_repdata, req->rq_repdata_len);
967         switch (rc) {
968         case 1:
969                 lustre_set_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
970         case 0:
971                 break;
972         default:
973                 CERROR("failed unpack reply: x%llu\n", req->rq_xid);
974                 return -EPROTO;
975         }
976
977         if (req->rq_repdata_len < sizeof(struct lustre_msg)) {
978                 CERROR("replied data length %d too small\n",
979                        req->rq_repdata_len);
980                 return -EPROTO;
981         }
982
983         if (SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr) !=
984             SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc)) {
985                 CERROR("reply policy %u doesn't match request policy %u\n",
986                        SPTLRPC_FLVR_POLICY(req->rq_repdata->lm_secflvr),
987                        SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc));
988                 return -EPROTO;
989         }
990
991         switch (SPTLRPC_FLVR_SVC(req->rq_flvr.sf_rpc)) {
992         case SPTLRPC_SVC_NULL:
993         case SPTLRPC_SVC_AUTH:
994         case SPTLRPC_SVC_INTG:
995                 LASSERT(ctx->cc_ops->verify);
996                 rc = ctx->cc_ops->verify(ctx, req);
997                 break;
998         case SPTLRPC_SVC_PRIV:
999                 LASSERT(ctx->cc_ops->unseal);
1000                 rc = ctx->cc_ops->unseal(ctx, req);
1001                 break;
1002         default:
1003                 LBUG();
1004         }
1005         LASSERT(rc || req->rq_repmsg || req->rq_resend);
1006
1007         if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL &&
1008             !req->rq_ctx_init)
1009                 req->rq_rep_swab_mask = 0;
1010         return rc;
1011 }
1012
1013 /**
1014  * Used by ptlrpc client, to perform security transformation upon the reply
1015  * message of \a req. After return successfully, req->rq_repmsg points to
1016  * the reply message in clear text.
1017  *
1018  * \pre the reply buffer should have been un-posted from LNet, so nothing is
1019  * going to change.
1020  */
1021 int sptlrpc_cli_unwrap_reply(struct ptlrpc_request *req)
1022 {
1023         LASSERT(req->rq_repbuf);
1024         LASSERT(!req->rq_repdata);
1025         LASSERT(!req->rq_repmsg);
1026         LASSERT(req->rq_reply_off + req->rq_nob_received <= req->rq_repbuf_len);
1027
1028         if (req->rq_reply_off == 0 &&
1029             (lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT)) {
1030                 CERROR("real reply with offset 0\n");
1031                 return -EPROTO;
1032         }
1033
1034         if (req->rq_reply_off % 8 != 0) {
1035                 CERROR("reply at odd offset %u\n", req->rq_reply_off);
1036                 return -EPROTO;
1037         }
1038
1039         req->rq_repdata = (struct lustre_msg *)
1040                                 (req->rq_repbuf + req->rq_reply_off);
1041         req->rq_repdata_len = req->rq_nob_received;
1042
1043         return do_cli_unwrap_reply(req);
1044 }
1045
1046 /**
1047  * Used by ptlrpc client, to perform security transformation upon the early
1048  * reply message of \a req. We expect the rq_reply_off is 0, and
1049  * rq_nob_received is the early reply size.
1050  *
1051  * Because the receive buffer might be still posted, the reply data might be
1052  * changed at any time, no matter we're holding rq_lock or not. For this reason
1053  * we allocate a separate ptlrpc_request and reply buffer for early reply
1054  * processing.
1055  *
1056  * \retval 0 success, \a req_ret is filled with a duplicated ptlrpc_request.
1057  * Later the caller must call sptlrpc_cli_finish_early_reply() on the returned
1058  * \a *req_ret to release it.
1059  * \retval -ev error number, and \a req_ret will not be set.
1060  */
1061 int sptlrpc_cli_unwrap_early_reply(struct ptlrpc_request *req,
1062                                    struct ptlrpc_request **req_ret)
1063 {
1064         struct ptlrpc_request *early_req;
1065         char *early_buf;
1066         int early_bufsz, early_size;
1067         int rc;
1068
1069         early_req = ptlrpc_request_cache_alloc(GFP_NOFS);
1070         if (!early_req)
1071                 return -ENOMEM;
1072
1073         ptlrpc_cli_req_init(early_req);
1074
1075         early_size = req->rq_nob_received;
1076         early_bufsz = size_roundup_power2(early_size);
1077         early_buf = libcfs_kvzalloc(early_bufsz, GFP_NOFS);
1078         if (!early_buf) {
1079                 rc = -ENOMEM;
1080                 goto err_req;
1081         }
1082
1083         /* sanity checkings and copy data out, do it inside spinlock */
1084         spin_lock(&req->rq_lock);
1085
1086         if (req->rq_replied) {
1087                 spin_unlock(&req->rq_lock);
1088                 rc = -EALREADY;
1089                 goto err_buf;
1090         }
1091
1092         LASSERT(req->rq_repbuf);
1093         LASSERT(!req->rq_repdata);
1094         LASSERT(!req->rq_repmsg);
1095
1096         if (req->rq_reply_off != 0) {
1097                 CERROR("early reply with offset %u\n", req->rq_reply_off);
1098                 spin_unlock(&req->rq_lock);
1099                 rc = -EPROTO;
1100                 goto err_buf;
1101         }
1102
1103         if (req->rq_nob_received != early_size) {
1104                 /* even another early arrived the size should be the same */
1105                 CERROR("data size has changed from %u to %u\n",
1106                        early_size, req->rq_nob_received);
1107                 spin_unlock(&req->rq_lock);
1108                 rc = -EINVAL;
1109                 goto err_buf;
1110         }
1111
1112         if (req->rq_nob_received < sizeof(struct lustre_msg)) {
1113                 CERROR("early reply length %d too small\n",
1114                        req->rq_nob_received);
1115                 spin_unlock(&req->rq_lock);
1116                 rc = -EALREADY;
1117                 goto err_buf;
1118         }
1119
1120         memcpy(early_buf, req->rq_repbuf, early_size);
1121         spin_unlock(&req->rq_lock);
1122
1123         early_req->rq_cli_ctx = sptlrpc_cli_ctx_get(req->rq_cli_ctx);
1124         early_req->rq_flvr = req->rq_flvr;
1125         early_req->rq_repbuf = early_buf;
1126         early_req->rq_repbuf_len = early_bufsz;
1127         early_req->rq_repdata = (struct lustre_msg *)early_buf;
1128         early_req->rq_repdata_len = early_size;
1129         early_req->rq_early = 1;
1130         early_req->rq_reqmsg = req->rq_reqmsg;
1131
1132         rc = do_cli_unwrap_reply(early_req);
1133         if (rc) {
1134                 DEBUG_REQ(D_ADAPTTO, early_req,
1135                           "error %d unwrap early reply", rc);
1136                 goto err_ctx;
1137         }
1138
1139         LASSERT(early_req->rq_repmsg);
1140         *req_ret = early_req;
1141         return 0;
1142
1143 err_ctx:
1144         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1145 err_buf:
1146         kvfree(early_buf);
1147 err_req:
1148         ptlrpc_request_cache_free(early_req);
1149         return rc;
1150 }
1151
1152 /**
1153  * Used by ptlrpc client, to release a processed early reply \a early_req.
1154  *
1155  * \pre \a early_req was obtained from calling sptlrpc_cli_unwrap_early_reply().
1156  */
1157 void sptlrpc_cli_finish_early_reply(struct ptlrpc_request *early_req)
1158 {
1159         LASSERT(early_req->rq_repbuf);
1160         LASSERT(early_req->rq_repdata);
1161         LASSERT(early_req->rq_repmsg);
1162
1163         sptlrpc_cli_ctx_put(early_req->rq_cli_ctx, 1);
1164         kvfree(early_req->rq_repbuf);
1165         ptlrpc_request_cache_free(early_req);
1166 }
1167
1168 /**************************************************
1169  * sec ID                                        *
1170  **************************************************/
1171
1172 /*
1173  * "fixed" sec (e.g. null) use sec_id < 0
1174  */
1175 static atomic_t sptlrpc_sec_id = ATOMIC_INIT(1);
1176
1177 int sptlrpc_get_next_secid(void)
1178 {
1179         return atomic_inc_return(&sptlrpc_sec_id);
1180 }
1181 EXPORT_SYMBOL(sptlrpc_get_next_secid);
1182
1183 /**************************************************
1184  * client side high-level security APIs    *
1185  **************************************************/
1186
1187 static int sec_cop_flush_ctx_cache(struct ptlrpc_sec *sec, uid_t uid,
1188                                    int grace, int force)
1189 {
1190         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1191
1192         LASSERT(policy->sp_cops);
1193         LASSERT(policy->sp_cops->flush_ctx_cache);
1194
1195         return policy->sp_cops->flush_ctx_cache(sec, uid, grace, force);
1196 }
1197
1198 static void sec_cop_destroy_sec(struct ptlrpc_sec *sec)
1199 {
1200         struct ptlrpc_sec_policy *policy = sec->ps_policy;
1201
1202         LASSERT_ATOMIC_ZERO(&sec->ps_refcount);
1203         LASSERT_ATOMIC_ZERO(&sec->ps_nctx);
1204         LASSERT(policy->sp_cops->destroy_sec);
1205
1206         CDEBUG(D_SEC, "%s@%p: being destroyed\n", sec->ps_policy->sp_name, sec);
1207
1208         policy->sp_cops->destroy_sec(sec);
1209         sptlrpc_policy_put(policy);
1210 }
1211
1212 static void sptlrpc_sec_kill(struct ptlrpc_sec *sec)
1213 {
1214         LASSERT_ATOMIC_POS(&sec->ps_refcount);
1215
1216         if (sec->ps_policy->sp_cops->kill_sec) {
1217                 sec->ps_policy->sp_cops->kill_sec(sec);
1218
1219                 sec_cop_flush_ctx_cache(sec, -1, 1, 1);
1220         }
1221 }
1222
1223 static struct ptlrpc_sec *sptlrpc_sec_get(struct ptlrpc_sec *sec)
1224 {
1225         if (sec)
1226                 atomic_inc(&sec->ps_refcount);
1227
1228         return sec;
1229 }
1230
1231 void sptlrpc_sec_put(struct ptlrpc_sec *sec)
1232 {
1233         if (sec) {
1234                 LASSERT_ATOMIC_POS(&sec->ps_refcount);
1235
1236                 if (atomic_dec_and_test(&sec->ps_refcount)) {
1237                         sptlrpc_gc_del_sec(sec);
1238                         sec_cop_destroy_sec(sec);
1239                 }
1240         }
1241 }
1242 EXPORT_SYMBOL(sptlrpc_sec_put);
1243
1244 /*
1245  * policy module is responsible for taking reference of import
1246  */
1247 static
1248 struct ptlrpc_sec *sptlrpc_sec_create(struct obd_import *imp,
1249                                       struct ptlrpc_svc_ctx *svc_ctx,
1250                                       struct sptlrpc_flavor *sf,
1251                                       enum lustre_sec_part sp)
1252 {
1253         struct ptlrpc_sec_policy *policy;
1254         struct ptlrpc_sec *sec;
1255         char str[32];
1256
1257         if (svc_ctx) {
1258                 LASSERT(imp->imp_dlm_fake == 1);
1259
1260                 CDEBUG(D_SEC, "%s %s: reverse sec using flavor %s\n",
1261                        imp->imp_obd->obd_type->typ_name,
1262                        imp->imp_obd->obd_name,
1263                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1264
1265                 policy = sptlrpc_policy_get(svc_ctx->sc_policy);
1266                 sf->sf_flags |= PTLRPC_SEC_FL_REVERSE | PTLRPC_SEC_FL_ROOTONLY;
1267         } else {
1268                 LASSERT(imp->imp_dlm_fake == 0);
1269
1270                 CDEBUG(D_SEC, "%s %s: select security flavor %s\n",
1271                        imp->imp_obd->obd_type->typ_name,
1272                        imp->imp_obd->obd_name,
1273                        sptlrpc_flavor2name(sf, str, sizeof(str)));
1274
1275                 policy = sptlrpc_wireflavor2policy(sf->sf_rpc);
1276                 if (!policy) {
1277                         CERROR("invalid flavor 0x%x\n", sf->sf_rpc);
1278                         return NULL;
1279                 }
1280         }
1281
1282         sec = policy->sp_cops->create_sec(imp, svc_ctx, sf);
1283         if (sec) {
1284                 atomic_inc(&sec->ps_refcount);
1285
1286                 sec->ps_part = sp;
1287
1288                 if (sec->ps_gc_interval && policy->sp_cops->gc_ctx)
1289                         sptlrpc_gc_add_sec(sec);
1290         } else {
1291                 sptlrpc_policy_put(policy);
1292         }
1293
1294         return sec;
1295 }
1296
1297 struct ptlrpc_sec *sptlrpc_import_sec_ref(struct obd_import *imp)
1298 {
1299         struct ptlrpc_sec *sec;
1300
1301         spin_lock(&imp->imp_lock);
1302         sec = sptlrpc_sec_get(imp->imp_sec);
1303         spin_unlock(&imp->imp_lock);
1304
1305         return sec;
1306 }
1307 EXPORT_SYMBOL(sptlrpc_import_sec_ref);
1308
1309 static void sptlrpc_import_sec_install(struct obd_import *imp,
1310                                        struct ptlrpc_sec *sec)
1311 {
1312         struct ptlrpc_sec *old_sec;
1313
1314         LASSERT_ATOMIC_POS(&sec->ps_refcount);
1315
1316         spin_lock(&imp->imp_lock);
1317         old_sec = imp->imp_sec;
1318         imp->imp_sec = sec;
1319         spin_unlock(&imp->imp_lock);
1320
1321         if (old_sec) {
1322                 sptlrpc_sec_kill(old_sec);
1323
1324                 /* balance the ref taken by this import */
1325                 sptlrpc_sec_put(old_sec);
1326         }
1327 }
1328
1329 static inline
1330 int flavor_equal(struct sptlrpc_flavor *sf1, struct sptlrpc_flavor *sf2)
1331 {
1332         return (memcmp(sf1, sf2, sizeof(*sf1)) == 0);
1333 }
1334
1335 static inline
1336 void flavor_copy(struct sptlrpc_flavor *dst, struct sptlrpc_flavor *src)
1337 {
1338         *dst = *src;
1339 }
1340
1341 static void sptlrpc_import_sec_adapt_inplace(struct obd_import *imp,
1342                                              struct ptlrpc_sec *sec,
1343                                              struct sptlrpc_flavor *sf)
1344 {
1345         char str1[32], str2[32];
1346
1347         if (sec->ps_flvr.sf_flags != sf->sf_flags)
1348                 CDEBUG(D_SEC, "changing sec flags: %s -> %s\n",
1349                        sptlrpc_secflags2str(sec->ps_flvr.sf_flags,
1350                                             str1, sizeof(str1)),
1351                        sptlrpc_secflags2str(sf->sf_flags,
1352                                             str2, sizeof(str2)));
1353
1354         spin_lock(&sec->ps_lock);
1355         flavor_copy(&sec->ps_flvr, sf);
1356         spin_unlock(&sec->ps_lock);
1357 }
1358
1359 /**
1360  * To get an appropriate ptlrpc_sec for the \a imp, according to the current
1361  * configuration. Upon called, imp->imp_sec may or may not be NULL.
1362  *
1363  *  - regular import: \a svc_ctx should be NULL and \a flvr is ignored;
1364  *  - reverse import: \a svc_ctx and \a flvr are obtained from incoming request.
1365  */
1366 int sptlrpc_import_sec_adapt(struct obd_import *imp,
1367                              struct ptlrpc_svc_ctx *svc_ctx,
1368                              struct sptlrpc_flavor *flvr)
1369 {
1370         struct ptlrpc_connection *conn;
1371         struct sptlrpc_flavor sf;
1372         struct ptlrpc_sec *sec, *newsec;
1373         enum lustre_sec_part sp;
1374         char str[24];
1375         int rc = 0;
1376
1377         might_sleep();
1378
1379         if (!imp)
1380                 return 0;
1381
1382         conn = imp->imp_connection;
1383
1384         if (!svc_ctx) {
1385                 struct client_obd *cliobd = &imp->imp_obd->u.cli;
1386                 /*
1387                  * normal import, determine flavor from rule set, except
1388                  * for mgc the flavor is predetermined.
1389                  */
1390                 if (cliobd->cl_sp_me == LUSTRE_SP_MGC)
1391                         sf = cliobd->cl_flvr_mgc;
1392                 else
1393                         sptlrpc_conf_choose_flavor(cliobd->cl_sp_me,
1394                                                    cliobd->cl_sp_to,
1395                                                    &cliobd->cl_target_uuid,
1396                                                    conn->c_self, &sf);
1397
1398                 sp = imp->imp_obd->u.cli.cl_sp_me;
1399         } else {
1400                 /* reverse import, determine flavor from incoming request */
1401                 sf = *flvr;
1402
1403                 if (sf.sf_rpc != SPTLRPC_FLVR_NULL)
1404                         sf.sf_flags = PTLRPC_SEC_FL_REVERSE |
1405                                       PTLRPC_SEC_FL_ROOTONLY;
1406
1407                 sp = sptlrpc_target_sec_part(imp->imp_obd);
1408         }
1409
1410         sec = sptlrpc_import_sec_ref(imp);
1411         if (sec) {
1412                 char str2[24];
1413
1414                 if (flavor_equal(&sf, &sec->ps_flvr))
1415                         goto out;
1416
1417                 CDEBUG(D_SEC, "import %s->%s: changing flavor %s -> %s\n",
1418                        imp->imp_obd->obd_name,
1419                        obd_uuid2str(&conn->c_remote_uuid),
1420                        sptlrpc_flavor2name(&sec->ps_flvr, str, sizeof(str)),
1421                        sptlrpc_flavor2name(&sf, str2, sizeof(str2)));
1422
1423                 if (SPTLRPC_FLVR_POLICY(sf.sf_rpc) ==
1424                     SPTLRPC_FLVR_POLICY(sec->ps_flvr.sf_rpc) &&
1425                     SPTLRPC_FLVR_MECH(sf.sf_rpc) ==
1426                     SPTLRPC_FLVR_MECH(sec->ps_flvr.sf_rpc)) {
1427                         sptlrpc_import_sec_adapt_inplace(imp, sec, &sf);
1428                         goto out;
1429                 }
1430         } else if (SPTLRPC_FLVR_BASE(sf.sf_rpc) !=
1431                    SPTLRPC_FLVR_BASE(SPTLRPC_FLVR_NULL)) {
1432                 CDEBUG(D_SEC, "import %s->%s netid %x: select flavor %s\n",
1433                        imp->imp_obd->obd_name,
1434                        obd_uuid2str(&conn->c_remote_uuid),
1435                        LNET_NIDNET(conn->c_self),
1436                        sptlrpc_flavor2name(&sf, str, sizeof(str)));
1437         }
1438
1439         mutex_lock(&imp->imp_sec_mutex);
1440
1441         newsec = sptlrpc_sec_create(imp, svc_ctx, &sf, sp);
1442         if (newsec) {
1443                 sptlrpc_import_sec_install(imp, newsec);
1444         } else {
1445                 CERROR("import %s->%s: failed to create new sec\n",
1446                        imp->imp_obd->obd_name,
1447                        obd_uuid2str(&conn->c_remote_uuid));
1448                 rc = -EPERM;
1449         }
1450
1451         mutex_unlock(&imp->imp_sec_mutex);
1452 out:
1453         sptlrpc_sec_put(sec);
1454         return rc;
1455 }
1456
1457 void sptlrpc_import_sec_put(struct obd_import *imp)
1458 {
1459         if (imp->imp_sec) {
1460                 sptlrpc_sec_kill(imp->imp_sec);
1461
1462                 sptlrpc_sec_put(imp->imp_sec);
1463                 imp->imp_sec = NULL;
1464         }
1465 }
1466
1467 static void import_flush_ctx_common(struct obd_import *imp,
1468                                     uid_t uid, int grace, int force)
1469 {
1470         struct ptlrpc_sec *sec;
1471
1472         if (!imp)
1473                 return;
1474
1475         sec = sptlrpc_import_sec_ref(imp);
1476         if (!sec)
1477                 return;
1478
1479         sec_cop_flush_ctx_cache(sec, uid, grace, force);
1480         sptlrpc_sec_put(sec);
1481 }
1482
1483 void sptlrpc_import_flush_my_ctx(struct obd_import *imp)
1484 {
1485         import_flush_ctx_common(imp, from_kuid(&init_user_ns, current_uid()),
1486                                 1, 1);
1487 }
1488 EXPORT_SYMBOL(sptlrpc_import_flush_my_ctx);
1489
1490 void sptlrpc_import_flush_all_ctx(struct obd_import *imp)
1491 {
1492         import_flush_ctx_common(imp, -1, 1, 1);
1493 }
1494 EXPORT_SYMBOL(sptlrpc_import_flush_all_ctx);
1495
1496 /**
1497  * Used by ptlrpc client to allocate request buffer of \a req. Upon return
1498  * successfully, req->rq_reqmsg points to a buffer with size \a msgsize.
1499  */
1500 int sptlrpc_cli_alloc_reqbuf(struct ptlrpc_request *req, int msgsize)
1501 {
1502         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1503         struct ptlrpc_sec_policy *policy;
1504         int rc;
1505
1506         LASSERT(ctx);
1507         LASSERT(ctx->cc_sec);
1508         LASSERT(ctx->cc_sec->ps_policy);
1509         LASSERT(!req->rq_reqmsg);
1510         LASSERT_ATOMIC_POS(&ctx->cc_refcount);
1511
1512         policy = ctx->cc_sec->ps_policy;
1513         rc = policy->sp_cops->alloc_reqbuf(ctx->cc_sec, req, msgsize);
1514         if (!rc) {
1515                 LASSERT(req->rq_reqmsg);
1516                 LASSERT(req->rq_reqbuf || req->rq_clrbuf);
1517
1518                 /* zeroing preallocated buffer */
1519                 if (req->rq_pool)
1520                         memset(req->rq_reqmsg, 0, msgsize);
1521         }
1522
1523         return rc;
1524 }
1525
1526 /**
1527  * Used by ptlrpc client to free request buffer of \a req. After this
1528  * req->rq_reqmsg is set to NULL and should not be accessed anymore.
1529  */
1530 void sptlrpc_cli_free_reqbuf(struct ptlrpc_request *req)
1531 {
1532         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1533         struct ptlrpc_sec_policy *policy;
1534
1535         LASSERT(ctx);
1536         LASSERT(ctx->cc_sec);
1537         LASSERT(ctx->cc_sec->ps_policy);
1538         LASSERT_ATOMIC_POS(&ctx->cc_refcount);
1539
1540         if (!req->rq_reqbuf && !req->rq_clrbuf)
1541                 return;
1542
1543         policy = ctx->cc_sec->ps_policy;
1544         policy->sp_cops->free_reqbuf(ctx->cc_sec, req);
1545         req->rq_reqmsg = NULL;
1546 }
1547
1548 /*
1549  * NOTE caller must guarantee the buffer size is enough for the enlargement
1550  */
1551 void _sptlrpc_enlarge_msg_inplace(struct lustre_msg *msg,
1552                                   int segment, int newsize)
1553 {
1554         void *src, *dst;
1555         int oldsize, oldmsg_size, movesize;
1556
1557         LASSERT(segment < msg->lm_bufcount);
1558         LASSERT(msg->lm_buflens[segment] <= newsize);
1559
1560         if (msg->lm_buflens[segment] == newsize)
1561                 return;
1562
1563         /* nothing to do if we are enlarging the last segment */
1564         if (segment == msg->lm_bufcount - 1) {
1565                 msg->lm_buflens[segment] = newsize;
1566                 return;
1567         }
1568
1569         oldsize = msg->lm_buflens[segment];
1570
1571         src = lustre_msg_buf(msg, segment + 1, 0);
1572         msg->lm_buflens[segment] = newsize;
1573         dst = lustre_msg_buf(msg, segment + 1, 0);
1574         msg->lm_buflens[segment] = oldsize;
1575
1576         /* move from segment + 1 to end segment */
1577         LASSERT(msg->lm_magic == LUSTRE_MSG_MAGIC_V2);
1578         oldmsg_size = lustre_msg_size_v2(msg->lm_bufcount, msg->lm_buflens);
1579         movesize = oldmsg_size - ((unsigned long)src - (unsigned long)msg);
1580         LASSERT(movesize >= 0);
1581
1582         if (movesize)
1583                 memmove(dst, src, movesize);
1584
1585         /* note we don't clear the ares where old data live, not secret */
1586
1587         /* finally set new segment size */
1588         msg->lm_buflens[segment] = newsize;
1589 }
1590 EXPORT_SYMBOL(_sptlrpc_enlarge_msg_inplace);
1591
1592 /**
1593  * Used by ptlrpc client to enlarge the \a segment of request message pointed
1594  * by req->rq_reqmsg to size \a newsize, all previously filled-in data will be
1595  * preserved after the enlargement. this must be called after original request
1596  * buffer being allocated.
1597  *
1598  * \note after this be called, rq_reqmsg and rq_reqlen might have been changed,
1599  * so caller should refresh its local pointers if needed.
1600  */
1601 int sptlrpc_cli_enlarge_reqbuf(struct ptlrpc_request *req,
1602                                int segment, int newsize)
1603 {
1604         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1605         struct ptlrpc_sec_cops *cops;
1606         struct lustre_msg *msg = req->rq_reqmsg;
1607
1608         LASSERT(ctx);
1609         LASSERT(msg);
1610         LASSERT(msg->lm_bufcount > segment);
1611         LASSERT(msg->lm_buflens[segment] <= newsize);
1612
1613         if (msg->lm_buflens[segment] == newsize)
1614                 return 0;
1615
1616         cops = ctx->cc_sec->ps_policy->sp_cops;
1617         LASSERT(cops->enlarge_reqbuf);
1618         return cops->enlarge_reqbuf(ctx->cc_sec, req, segment, newsize);
1619 }
1620 EXPORT_SYMBOL(sptlrpc_cli_enlarge_reqbuf);
1621
1622 /**
1623  * Used by ptlrpc client to allocate reply buffer of \a req.
1624  *
1625  * \note After this, req->rq_repmsg is still not accessible.
1626  */
1627 int sptlrpc_cli_alloc_repbuf(struct ptlrpc_request *req, int msgsize)
1628 {
1629         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1630         struct ptlrpc_sec_policy *policy;
1631
1632         LASSERT(ctx);
1633         LASSERT(ctx->cc_sec);
1634         LASSERT(ctx->cc_sec->ps_policy);
1635
1636         if (req->rq_repbuf)
1637                 return 0;
1638
1639         policy = ctx->cc_sec->ps_policy;
1640         return policy->sp_cops->alloc_repbuf(ctx->cc_sec, req, msgsize);
1641 }
1642
1643 /**
1644  * Used by ptlrpc client to free reply buffer of \a req. After this
1645  * req->rq_repmsg is set to NULL and should not be accessed anymore.
1646  */
1647 void sptlrpc_cli_free_repbuf(struct ptlrpc_request *req)
1648 {
1649         struct ptlrpc_cli_ctx *ctx = req->rq_cli_ctx;
1650         struct ptlrpc_sec_policy *policy;
1651
1652         LASSERT(ctx);
1653         LASSERT(ctx->cc_sec);
1654         LASSERT(ctx->cc_sec->ps_policy);
1655         LASSERT_ATOMIC_POS(&ctx->cc_refcount);
1656
1657         if (!req->rq_repbuf)
1658                 return;
1659         LASSERT(req->rq_repbuf_len);
1660
1661         policy = ctx->cc_sec->ps_policy;
1662         policy->sp_cops->free_repbuf(ctx->cc_sec, req);
1663         req->rq_repmsg = NULL;
1664 }
1665
1666 static int sptlrpc_svc_install_rvs_ctx(struct obd_import *imp,
1667                                        struct ptlrpc_svc_ctx *ctx)
1668 {
1669         struct ptlrpc_sec_policy *policy = ctx->sc_policy;
1670
1671         if (!policy->sp_sops->install_rctx)
1672                 return 0;
1673         return policy->sp_sops->install_rctx(imp, ctx);
1674 }
1675
1676 /****************************************
1677  * server side security          *
1678  ****************************************/
1679
1680 static int flavor_allowed(struct sptlrpc_flavor *exp,
1681                           struct ptlrpc_request *req)
1682 {
1683         struct sptlrpc_flavor *flvr = &req->rq_flvr;
1684
1685         if (exp->sf_rpc == SPTLRPC_FLVR_ANY || exp->sf_rpc == flvr->sf_rpc)
1686                 return 1;
1687
1688         if ((req->rq_ctx_init || req->rq_ctx_fini) &&
1689             SPTLRPC_FLVR_POLICY(exp->sf_rpc) ==
1690             SPTLRPC_FLVR_POLICY(flvr->sf_rpc) &&
1691             SPTLRPC_FLVR_MECH(exp->sf_rpc) == SPTLRPC_FLVR_MECH(flvr->sf_rpc))
1692                 return 1;
1693
1694         return 0;
1695 }
1696
1697 #define EXP_FLVR_UPDATE_EXPIRE      (OBD_TIMEOUT_DEFAULT + 10)
1698
1699 /**
1700  * Given an export \a exp, check whether the flavor of incoming \a req
1701  * is allowed by the export \a exp. Main logic is about taking care of
1702  * changing configurations. Return 0 means success.
1703  */
1704 int sptlrpc_target_export_check(struct obd_export *exp,
1705                                 struct ptlrpc_request *req)
1706 {
1707         struct sptlrpc_flavor flavor;
1708
1709         if (!exp)
1710                 return 0;
1711
1712         /* client side export has no imp_reverse, skip
1713          * FIXME maybe we should check flavor this as well???
1714          */
1715         if (!exp->exp_imp_reverse)
1716                 return 0;
1717
1718         /* don't care about ctx fini rpc */
1719         if (req->rq_ctx_fini)
1720                 return 0;
1721
1722         spin_lock(&exp->exp_lock);
1723
1724         /* if flavor just changed (exp->exp_flvr_changed != 0), we wait for
1725          * the first req with the new flavor, then treat it as current flavor,
1726          * adapt reverse sec according to it.
1727          * note the first rpc with new flavor might not be with root ctx, in
1728          * which case delay the sec_adapt by leaving exp_flvr_adapt == 1.
1729          */
1730         if (unlikely(exp->exp_flvr_changed) &&
1731             flavor_allowed(&exp->exp_flvr_old[1], req)) {
1732                 /* make the new flavor as "current", and old ones as
1733                  * about-to-expire
1734                  */
1735                 CDEBUG(D_SEC, "exp %p: just changed: %x->%x\n", exp,
1736                        exp->exp_flvr.sf_rpc, exp->exp_flvr_old[1].sf_rpc);
1737                 flavor = exp->exp_flvr_old[1];
1738                 exp->exp_flvr_old[1] = exp->exp_flvr_old[0];
1739                 exp->exp_flvr_expire[1] = exp->exp_flvr_expire[0];
1740                 exp->exp_flvr_old[0] = exp->exp_flvr;
1741                 exp->exp_flvr_expire[0] = ktime_get_real_seconds() +
1742                                           EXP_FLVR_UPDATE_EXPIRE;
1743                 exp->exp_flvr = flavor;
1744
1745                 /* flavor change finished */
1746                 exp->exp_flvr_changed = 0;
1747                 LASSERT(exp->exp_flvr_adapt == 1);
1748
1749                 /* if it's gss, we only interested in root ctx init */
1750                 if (req->rq_auth_gss &&
1751                     !(req->rq_ctx_init &&
1752                       (req->rq_auth_usr_root || req->rq_auth_usr_mdt ||
1753                        req->rq_auth_usr_ost))) {
1754                         spin_unlock(&exp->exp_lock);
1755                         CDEBUG(D_SEC, "is good but not root(%d:%d:%d:%d:%d)\n",
1756                                req->rq_auth_gss, req->rq_ctx_init,
1757                                req->rq_auth_usr_root, req->rq_auth_usr_mdt,
1758                                req->rq_auth_usr_ost);
1759                         return 0;
1760                 }
1761
1762                 exp->exp_flvr_adapt = 0;
1763                 spin_unlock(&exp->exp_lock);
1764
1765                 return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1766                                                 req->rq_svc_ctx, &flavor);
1767         }
1768
1769         /* if it equals to the current flavor, we accept it, but need to
1770          * dealing with reverse sec/ctx
1771          */
1772         if (likely(flavor_allowed(&exp->exp_flvr, req))) {
1773                 /* most cases should return here, we only interested in
1774                  * gss root ctx init
1775                  */
1776                 if (!req->rq_auth_gss || !req->rq_ctx_init ||
1777                     (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
1778                      !req->rq_auth_usr_ost)) {
1779                         spin_unlock(&exp->exp_lock);
1780                         return 0;
1781                 }
1782
1783                 /* if flavor just changed, we should not proceed, just leave
1784                  * it and current flavor will be discovered and replaced
1785                  * shortly, and let _this_ rpc pass through
1786                  */
1787                 if (exp->exp_flvr_changed) {
1788                         LASSERT(exp->exp_flvr_adapt);
1789                         spin_unlock(&exp->exp_lock);
1790                         return 0;
1791                 }
1792
1793                 if (exp->exp_flvr_adapt) {
1794                         exp->exp_flvr_adapt = 0;
1795                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): do delayed adapt\n",
1796                                exp, exp->exp_flvr.sf_rpc,
1797                                exp->exp_flvr_old[0].sf_rpc,
1798                                exp->exp_flvr_old[1].sf_rpc);
1799                         flavor = exp->exp_flvr;
1800                         spin_unlock(&exp->exp_lock);
1801
1802                         return sptlrpc_import_sec_adapt(exp->exp_imp_reverse,
1803                                                         req->rq_svc_ctx,
1804                                                         &flavor);
1805                 } else {
1806                         CDEBUG(D_SEC, "exp %p (%x|%x|%x): is current flavor, install rvs ctx\n",
1807                                exp, exp->exp_flvr.sf_rpc,
1808                                exp->exp_flvr_old[0].sf_rpc,
1809                                exp->exp_flvr_old[1].sf_rpc);
1810                         spin_unlock(&exp->exp_lock);
1811
1812                         return sptlrpc_svc_install_rvs_ctx(exp->exp_imp_reverse,
1813                                                            req->rq_svc_ctx);
1814                 }
1815         }
1816
1817         if (exp->exp_flvr_expire[0]) {
1818                 if (exp->exp_flvr_expire[0] >= ktime_get_real_seconds()) {
1819                         if (flavor_allowed(&exp->exp_flvr_old[0], req)) {
1820                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the middle one (%lld)\n", exp,
1821                                        exp->exp_flvr.sf_rpc,
1822                                        exp->exp_flvr_old[0].sf_rpc,
1823                                        exp->exp_flvr_old[1].sf_rpc,
1824                                        (s64)(exp->exp_flvr_expire[0] -
1825                                        ktime_get_real_seconds()));
1826                                 spin_unlock(&exp->exp_lock);
1827                                 return 0;
1828                         }
1829                 } else {
1830                         CDEBUG(D_SEC, "mark middle expired\n");
1831                         exp->exp_flvr_expire[0] = 0;
1832                 }
1833                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match middle\n", exp,
1834                        exp->exp_flvr.sf_rpc,
1835                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1836                        req->rq_flvr.sf_rpc);
1837         }
1838
1839         /* now it doesn't match the current flavor, the only chance we can
1840          * accept it is match the old flavors which is not expired.
1841          */
1842         if (exp->exp_flvr_changed == 0 && exp->exp_flvr_expire[1]) {
1843                 if (exp->exp_flvr_expire[1] >= ktime_get_real_seconds()) {
1844                         if (flavor_allowed(&exp->exp_flvr_old[1], req)) {
1845                                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): match the oldest one (%lld)\n",
1846                                        exp,
1847                                        exp->exp_flvr.sf_rpc,
1848                                        exp->exp_flvr_old[0].sf_rpc,
1849                                        exp->exp_flvr_old[1].sf_rpc,
1850                                        (s64)(exp->exp_flvr_expire[1] -
1851                                        ktime_get_real_seconds()));
1852                                 spin_unlock(&exp->exp_lock);
1853                                 return 0;
1854                         }
1855                 } else {
1856                         CDEBUG(D_SEC, "mark oldest expired\n");
1857                         exp->exp_flvr_expire[1] = 0;
1858                 }
1859                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): %x not match found\n",
1860                        exp, exp->exp_flvr.sf_rpc,
1861                        exp->exp_flvr_old[0].sf_rpc, exp->exp_flvr_old[1].sf_rpc,
1862                        req->rq_flvr.sf_rpc);
1863         } else {
1864                 CDEBUG(D_SEC, "exp %p (%x|%x|%x): skip the last one\n",
1865                        exp, exp->exp_flvr.sf_rpc, exp->exp_flvr_old[0].sf_rpc,
1866                        exp->exp_flvr_old[1].sf_rpc);
1867         }
1868
1869         spin_unlock(&exp->exp_lock);
1870
1871         CWARN("exp %p(%s): req %p (%u|%u|%u|%u|%u|%u) with unauthorized flavor %x, expect %x|%x(%+lld)|%x(%+lld)\n",
1872               exp, exp->exp_obd->obd_name,
1873               req, req->rq_auth_gss, req->rq_ctx_init, req->rq_ctx_fini,
1874               req->rq_auth_usr_root, req->rq_auth_usr_mdt, req->rq_auth_usr_ost,
1875               req->rq_flvr.sf_rpc,
1876               exp->exp_flvr.sf_rpc,
1877               exp->exp_flvr_old[0].sf_rpc,
1878               exp->exp_flvr_expire[0] ?
1879               (s64)(exp->exp_flvr_expire[0] - ktime_get_real_seconds()) : 0,
1880               exp->exp_flvr_old[1].sf_rpc,
1881               exp->exp_flvr_expire[1] ?
1882               (s64)(exp->exp_flvr_expire[1] - ktime_get_real_seconds()) : 0);
1883         return -EACCES;
1884 }
1885 EXPORT_SYMBOL(sptlrpc_target_export_check);
1886
1887 static int sptlrpc_svc_check_from(struct ptlrpc_request *req, int svc_rc)
1888 {
1889         /* peer's claim is unreliable unless gss is being used */
1890         if (!req->rq_auth_gss || svc_rc == SECSVC_DROP)
1891                 return svc_rc;
1892
1893         switch (req->rq_sp_from) {
1894         case LUSTRE_SP_CLI:
1895                 if (req->rq_auth_usr_mdt || req->rq_auth_usr_ost) {
1896                         DEBUG_REQ(D_ERROR, req, "faked source CLI");
1897                         svc_rc = SECSVC_DROP;
1898                 }
1899                 break;
1900         case LUSTRE_SP_MDT:
1901                 if (!req->rq_auth_usr_mdt) {
1902                         DEBUG_REQ(D_ERROR, req, "faked source MDT");
1903                         svc_rc = SECSVC_DROP;
1904                 }
1905                 break;
1906         case LUSTRE_SP_OST:
1907                 if (!req->rq_auth_usr_ost) {
1908                         DEBUG_REQ(D_ERROR, req, "faked source OST");
1909                         svc_rc = SECSVC_DROP;
1910                 }
1911                 break;
1912         case LUSTRE_SP_MGS:
1913         case LUSTRE_SP_MGC:
1914                 if (!req->rq_auth_usr_root && !req->rq_auth_usr_mdt &&
1915                     !req->rq_auth_usr_ost) {
1916                         DEBUG_REQ(D_ERROR, req, "faked source MGC/MGS");
1917                         svc_rc = SECSVC_DROP;
1918                 }
1919                 break;
1920         case LUSTRE_SP_ANY:
1921         default:
1922                 DEBUG_REQ(D_ERROR, req, "invalid source %u", req->rq_sp_from);
1923                 svc_rc = SECSVC_DROP;
1924         }
1925
1926         return svc_rc;
1927 }
1928
1929 /**
1930  * Used by ptlrpc server, to perform transformation upon request message of
1931  * incoming \a req. This must be the first thing to do with a incoming
1932  * request in ptlrpc layer.
1933  *
1934  * \retval SECSVC_OK success, and req->rq_reqmsg point to request message in
1935  * clear text, size is req->rq_reqlen; also req->rq_svc_ctx is set.
1936  * \retval SECSVC_COMPLETE success, the request has been fully processed, and
1937  * reply message has been prepared.
1938  * \retval SECSVC_DROP failed, this request should be dropped.
1939  */
1940 int sptlrpc_svc_unwrap_request(struct ptlrpc_request *req)
1941 {
1942         struct ptlrpc_sec_policy *policy;
1943         struct lustre_msg *msg = req->rq_reqbuf;
1944         int rc;
1945
1946         LASSERT(msg);
1947         LASSERT(!req->rq_reqmsg);
1948         LASSERT(!req->rq_repmsg);
1949         LASSERT(!req->rq_svc_ctx);
1950
1951         req->rq_req_swab_mask = 0;
1952
1953         rc = __lustre_unpack_msg(msg, req->rq_reqdata_len);
1954         switch (rc) {
1955         case 1:
1956                 lustre_set_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
1957         case 0:
1958                 break;
1959         default:
1960                 CERROR("error unpacking request from %s x%llu\n",
1961                        libcfs_id2str(req->rq_peer), req->rq_xid);
1962                 return SECSVC_DROP;
1963         }
1964
1965         req->rq_flvr.sf_rpc = WIRE_FLVR(msg->lm_secflvr);
1966         req->rq_sp_from = LUSTRE_SP_ANY;
1967         req->rq_auth_uid = -1;
1968         req->rq_auth_mapped_uid = -1;
1969
1970         policy = sptlrpc_wireflavor2policy(req->rq_flvr.sf_rpc);
1971         if (!policy) {
1972                 CERROR("unsupported rpc flavor %x\n", req->rq_flvr.sf_rpc);
1973                 return SECSVC_DROP;
1974         }
1975
1976         LASSERT(policy->sp_sops->accept);
1977         rc = policy->sp_sops->accept(req);
1978         sptlrpc_policy_put(policy);
1979         LASSERT(req->rq_reqmsg || rc != SECSVC_OK);
1980         LASSERT(req->rq_svc_ctx || rc == SECSVC_DROP);
1981
1982         /*
1983          * if it's not null flavor (which means embedded packing msg),
1984          * reset the swab mask for the coming inner msg unpacking.
1985          */
1986         if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL)
1987                 req->rq_req_swab_mask = 0;
1988
1989         /* sanity check for the request source */
1990         rc = sptlrpc_svc_check_from(req, rc);
1991         return rc;
1992 }
1993
1994 /**
1995  * Used by ptlrpc server, to allocate reply buffer for \a req. If succeed,
1996  * req->rq_reply_state is set, and req->rq_reply_state->rs_msg point to
1997  * a buffer of \a msglen size.
1998  */
1999 int sptlrpc_svc_alloc_rs(struct ptlrpc_request *req, int msglen)
2000 {
2001         struct ptlrpc_sec_policy *policy;
2002         struct ptlrpc_reply_state *rs;
2003         int rc;
2004
2005         LASSERT(req->rq_svc_ctx);
2006         LASSERT(req->rq_svc_ctx->sc_policy);
2007
2008         policy = req->rq_svc_ctx->sc_policy;
2009         LASSERT(policy->sp_sops->alloc_rs);
2010
2011         rc = policy->sp_sops->alloc_rs(req, msglen);
2012         if (unlikely(rc == -ENOMEM)) {
2013                 struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
2014
2015                 if (svcpt->scp_service->srv_max_reply_size <
2016                    msglen + sizeof(struct ptlrpc_reply_state)) {
2017                         /* Just return failure if the size is too big */
2018                         CERROR("size of message is too big (%zd), %d allowed\n",
2019                                msglen + sizeof(struct ptlrpc_reply_state),
2020                                svcpt->scp_service->srv_max_reply_size);
2021                         return -ENOMEM;
2022                 }
2023
2024                 /* failed alloc, try emergency pool */
2025                 rs = lustre_get_emerg_rs(svcpt);
2026                 if (!rs)
2027                         return -ENOMEM;
2028
2029                 req->rq_reply_state = rs;
2030                 rc = policy->sp_sops->alloc_rs(req, msglen);
2031                 if (rc) {
2032                         lustre_put_emerg_rs(rs);
2033                         req->rq_reply_state = NULL;
2034                 }
2035         }
2036
2037         LASSERT(rc != 0 ||
2038                 (req->rq_reply_state && req->rq_reply_state->rs_msg));
2039
2040         return rc;
2041 }
2042
2043 /**
2044  * Used by ptlrpc server, to perform transformation upon reply message.
2045  *
2046  * \post req->rq_reply_off is set to appropriate server-controlled reply offset.
2047  * \post req->rq_repmsg and req->rq_reply_state->rs_msg becomes inaccessible.
2048  */
2049 int sptlrpc_svc_wrap_reply(struct ptlrpc_request *req)
2050 {
2051         struct ptlrpc_sec_policy *policy;
2052         int rc;
2053
2054         LASSERT(req->rq_svc_ctx);
2055         LASSERT(req->rq_svc_ctx->sc_policy);
2056
2057         policy = req->rq_svc_ctx->sc_policy;
2058         LASSERT(policy->sp_sops->authorize);
2059
2060         rc = policy->sp_sops->authorize(req);
2061         LASSERT(rc || req->rq_reply_state->rs_repdata_len);
2062
2063         return rc;
2064 }
2065
2066 /**
2067  * Used by ptlrpc server, to free reply_state.
2068  */
2069 void sptlrpc_svc_free_rs(struct ptlrpc_reply_state *rs)
2070 {
2071         struct ptlrpc_sec_policy *policy;
2072         unsigned int prealloc;
2073
2074         LASSERT(rs->rs_svc_ctx);
2075         LASSERT(rs->rs_svc_ctx->sc_policy);
2076
2077         policy = rs->rs_svc_ctx->sc_policy;
2078         LASSERT(policy->sp_sops->free_rs);
2079
2080         prealloc = rs->rs_prealloc;
2081         policy->sp_sops->free_rs(rs);
2082
2083         if (prealloc)
2084                 lustre_put_emerg_rs(rs);
2085 }
2086
2087 void sptlrpc_svc_ctx_addref(struct ptlrpc_request *req)
2088 {
2089         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2090
2091         if (ctx)
2092                 atomic_inc(&ctx->sc_refcount);
2093 }
2094
2095 void sptlrpc_svc_ctx_decref(struct ptlrpc_request *req)
2096 {
2097         struct ptlrpc_svc_ctx *ctx = req->rq_svc_ctx;
2098
2099         if (!ctx)
2100                 return;
2101
2102         LASSERT_ATOMIC_POS(&ctx->sc_refcount);
2103         if (atomic_dec_and_test(&ctx->sc_refcount)) {
2104                 if (ctx->sc_policy->sp_sops->free_ctx)
2105                         ctx->sc_policy->sp_sops->free_ctx(ctx);
2106         }
2107         req->rq_svc_ctx = NULL;
2108 }
2109
2110 /****************************************
2111  * bulk security                        *
2112  ****************************************/
2113
2114 /**
2115  * Perform transformation upon bulk data pointed by \a desc. This is called
2116  * before transforming the request message.
2117  */
2118 int sptlrpc_cli_wrap_bulk(struct ptlrpc_request *req,
2119                           struct ptlrpc_bulk_desc *desc)
2120 {
2121         struct ptlrpc_cli_ctx *ctx;
2122
2123         LASSERT(req->rq_bulk_read || req->rq_bulk_write);
2124
2125         if (!req->rq_pack_bulk)
2126                 return 0;
2127
2128         ctx = req->rq_cli_ctx;
2129         if (ctx->cc_ops->wrap_bulk)
2130                 return ctx->cc_ops->wrap_bulk(ctx, req, desc);
2131         return 0;
2132 }
2133 EXPORT_SYMBOL(sptlrpc_cli_wrap_bulk);
2134
2135 /**
2136  * This is called after unwrap the reply message.
2137  * return nob of actual plain text size received, or error code.
2138  */
2139 int sptlrpc_cli_unwrap_bulk_read(struct ptlrpc_request *req,
2140                                  struct ptlrpc_bulk_desc *desc,
2141                                  int nob)
2142 {
2143         struct ptlrpc_cli_ctx *ctx;
2144         int rc;
2145
2146         LASSERT(req->rq_bulk_read && !req->rq_bulk_write);
2147
2148         if (!req->rq_pack_bulk)
2149                 return desc->bd_nob_transferred;
2150
2151         ctx = req->rq_cli_ctx;
2152         if (ctx->cc_ops->unwrap_bulk) {
2153                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2154                 if (rc < 0)
2155                         return rc;
2156         }
2157         return desc->bd_nob_transferred;
2158 }
2159 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_read);
2160
2161 /**
2162  * This is called after unwrap the reply message.
2163  * return 0 for success or error code.
2164  */
2165 int sptlrpc_cli_unwrap_bulk_write(struct ptlrpc_request *req,
2166                                   struct ptlrpc_bulk_desc *desc)
2167 {
2168         struct ptlrpc_cli_ctx *ctx;
2169         int rc;
2170
2171         LASSERT(!req->rq_bulk_read && req->rq_bulk_write);
2172
2173         if (!req->rq_pack_bulk)
2174                 return 0;
2175
2176         ctx = req->rq_cli_ctx;
2177         if (ctx->cc_ops->unwrap_bulk) {
2178                 rc = ctx->cc_ops->unwrap_bulk(ctx, req, desc);
2179                 if (rc < 0)
2180                         return rc;
2181         }
2182
2183         /*
2184          * if everything is going right, nob should equals to nob_transferred.
2185          * in case of privacy mode, nob_transferred needs to be adjusted.
2186          */
2187         if (desc->bd_nob != desc->bd_nob_transferred) {
2188                 CERROR("nob %d doesn't match transferred nob %d\n",
2189                        desc->bd_nob, desc->bd_nob_transferred);
2190                 return -EPROTO;
2191         }
2192
2193         return 0;
2194 }
2195 EXPORT_SYMBOL(sptlrpc_cli_unwrap_bulk_write);
2196
2197 /****************************************
2198  * user descriptor helpers            *
2199  ****************************************/
2200
2201 int sptlrpc_current_user_desc_size(void)
2202 {
2203         int ngroups;
2204
2205         ngroups = current_ngroups;
2206
2207         if (ngroups > LUSTRE_MAX_GROUPS)
2208                 ngroups = LUSTRE_MAX_GROUPS;
2209         return sptlrpc_user_desc_size(ngroups);
2210 }
2211 EXPORT_SYMBOL(sptlrpc_current_user_desc_size);
2212
2213 int sptlrpc_pack_user_desc(struct lustre_msg *msg, int offset)
2214 {
2215         struct ptlrpc_user_desc *pud;
2216
2217         pud = lustre_msg_buf(msg, offset, 0);
2218
2219         if (!pud)
2220                 return -EINVAL;
2221
2222         pud->pud_uid = from_kuid(&init_user_ns, current_uid());
2223         pud->pud_gid = from_kgid(&init_user_ns, current_gid());
2224         pud->pud_fsuid = from_kuid(&init_user_ns, current_fsuid());
2225         pud->pud_fsgid = from_kgid(&init_user_ns, current_fsgid());
2226         pud->pud_cap = cfs_curproc_cap_pack();
2227         pud->pud_ngroups = (msg->lm_buflens[offset] - sizeof(*pud)) / 4;
2228
2229         task_lock(current);
2230         if (pud->pud_ngroups > current_ngroups)
2231                 pud->pud_ngroups = current_ngroups;
2232         memcpy(pud->pud_groups, current_cred()->group_info->gid,
2233                pud->pud_ngroups * sizeof(__u32));
2234         task_unlock(current);
2235
2236         return 0;
2237 }
2238 EXPORT_SYMBOL(sptlrpc_pack_user_desc);
2239
2240 int sptlrpc_unpack_user_desc(struct lustre_msg *msg, int offset, int swabbed)
2241 {
2242         struct ptlrpc_user_desc *pud;
2243         int i;
2244
2245         pud = lustre_msg_buf(msg, offset, sizeof(*pud));
2246         if (!pud)
2247                 return -EINVAL;
2248
2249         if (swabbed) {
2250                 __swab32s(&pud->pud_uid);
2251                 __swab32s(&pud->pud_gid);
2252                 __swab32s(&pud->pud_fsuid);
2253                 __swab32s(&pud->pud_fsgid);
2254                 __swab32s(&pud->pud_cap);
2255                 __swab32s(&pud->pud_ngroups);
2256         }
2257
2258         if (pud->pud_ngroups > LUSTRE_MAX_GROUPS) {
2259                 CERROR("%u groups is too large\n", pud->pud_ngroups);
2260                 return -EINVAL;
2261         }
2262
2263         if (sizeof(*pud) + pud->pud_ngroups * sizeof(__u32) >
2264             msg->lm_buflens[offset]) {
2265                 CERROR("%u groups are claimed but bufsize only %u\n",
2266                        pud->pud_ngroups, msg->lm_buflens[offset]);
2267                 return -EINVAL;
2268         }
2269
2270         if (swabbed) {
2271                 for (i = 0; i < pud->pud_ngroups; i++)
2272                         __swab32s(&pud->pud_groups[i]);
2273         }
2274
2275         return 0;
2276 }
2277 EXPORT_SYMBOL(sptlrpc_unpack_user_desc);
2278
2279 /****************************************
2280  * misc helpers                  *
2281  ****************************************/
2282
2283 const char *sec2target_str(struct ptlrpc_sec *sec)
2284 {
2285         if (!sec || !sec->ps_import || !sec->ps_import->imp_obd)
2286                 return "*";
2287         if (sec_is_reverse(sec))
2288                 return "c";
2289         return obd_uuid2str(&sec->ps_import->imp_obd->u.cli.cl_target_uuid);
2290 }
2291 EXPORT_SYMBOL(sec2target_str);
2292
2293 /*
2294  * return true if the bulk data is protected
2295  */
2296 bool sptlrpc_flavor_has_bulk(struct sptlrpc_flavor *flvr)
2297 {
2298         switch (SPTLRPC_FLVR_BULK_SVC(flvr->sf_rpc)) {
2299         case SPTLRPC_BULK_SVC_INTG:
2300         case SPTLRPC_BULK_SVC_PRIV:
2301                 return true;
2302         default:
2303                 return false;
2304         }
2305 }
2306 EXPORT_SYMBOL(sptlrpc_flavor_has_bulk);
2307
2308 /****************************************
2309  * crypto API helper/alloc blkciper     *
2310  ****************************************/
2311
2312 /****************************************
2313  * initialize/finalize            *
2314  ****************************************/
2315
2316 int sptlrpc_init(void)
2317 {
2318         int rc;
2319
2320         rwlock_init(&policy_lock);
2321
2322         rc = sptlrpc_gc_init();
2323         if (rc)
2324                 goto out;
2325
2326         rc = sptlrpc_conf_init();
2327         if (rc)
2328                 goto out_gc;
2329
2330         rc = sptlrpc_enc_pool_init();
2331         if (rc)
2332                 goto out_conf;
2333
2334         rc = sptlrpc_null_init();
2335         if (rc)
2336                 goto out_pool;
2337
2338         rc = sptlrpc_plain_init();
2339         if (rc)
2340                 goto out_null;
2341
2342         rc = sptlrpc_lproc_init();
2343         if (rc)
2344                 goto out_plain;
2345
2346         return 0;
2347
2348 out_plain:
2349         sptlrpc_plain_fini();
2350 out_null:
2351         sptlrpc_null_fini();
2352 out_pool:
2353         sptlrpc_enc_pool_fini();
2354 out_conf:
2355         sptlrpc_conf_fini();
2356 out_gc:
2357         sptlrpc_gc_fini();
2358 out:
2359         return rc;
2360 }
2361
2362 void sptlrpc_fini(void)
2363 {
2364         sptlrpc_lproc_fini();
2365         sptlrpc_plain_fini();
2366         sptlrpc_null_fini();
2367         sptlrpc_enc_pool_fini();
2368         sptlrpc_conf_fini();
2369         sptlrpc_gc_fini();
2370 }