GNU Linux-libre 4.9.337-gnu1
[releases.git] / net / ipv4 / xfrm4_policy.c
1 /*
2  * xfrm4_policy.c
3  *
4  * Changes:
5  *      Kazunori MIYAZAWA @USAGI
6  *      YOSHIFUJI Hideaki @USAGI
7  *              Split up af-specific portion
8  *
9  */
10
11 #include <linux/err.h>
12 #include <linux/kernel.h>
13 #include <linux/inetdevice.h>
14 #include <linux/if_tunnel.h>
15 #include <net/dst.h>
16 #include <net/xfrm.h>
17 #include <net/ip.h>
18 #include <net/l3mdev.h>
19 #include <net/inet_ecn.h>
20
21 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
22
23 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
24                                             int tos, int oif,
25                                             const xfrm_address_t *saddr,
26                                             const xfrm_address_t *daddr)
27 {
28         struct rtable *rt;
29
30         memset(fl4, 0, sizeof(*fl4));
31         fl4->daddr = daddr->a4;
32         fl4->flowi4_tos = tos;
33         fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
34         if (saddr)
35                 fl4->saddr = saddr->a4;
36
37         fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
38
39         rt = __ip_route_output_key(net, fl4);
40         if (!IS_ERR(rt))
41                 return &rt->dst;
42
43         return ERR_CAST(rt);
44 }
45
46 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
47                                           const xfrm_address_t *saddr,
48                                           const xfrm_address_t *daddr)
49 {
50         struct flowi4 fl4;
51
52         return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
53 }
54
55 static int xfrm4_get_saddr(struct net *net, int oif,
56                            xfrm_address_t *saddr, xfrm_address_t *daddr)
57 {
58         struct dst_entry *dst;
59         struct flowi4 fl4;
60
61         dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
62         if (IS_ERR(dst))
63                 return -EHOSTUNREACH;
64
65         saddr->a4 = fl4.saddr;
66         dst_release(dst);
67         return 0;
68 }
69
70 static int xfrm4_get_tos(const struct flowi *fl)
71 {
72         return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */
73 }
74
75 static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
76                            int nfheader_len)
77 {
78         return 0;
79 }
80
81 static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
82                           const struct flowi *fl)
83 {
84         struct rtable *rt = (struct rtable *)xdst->route;
85         const struct flowi4 *fl4 = &fl->u.ip4;
86
87         xdst->u.rt.rt_iif = fl4->flowi4_iif;
88
89         xdst->u.dst.dev = dev;
90         dev_hold(dev);
91
92         /* Sheit... I remember I did this right. Apparently,
93          * it was magically lost, so this code needs audit */
94         xdst->u.rt.rt_is_input = rt->rt_is_input;
95         xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
96                                               RTCF_LOCAL);
97         xdst->u.rt.rt_type = rt->rt_type;
98         xdst->u.rt.rt_gateway = rt->rt_gateway;
99         xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
100         xdst->u.rt.rt_pmtu = rt->rt_pmtu;
101         xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
102         xdst->u.rt.rt_table_id = rt->rt_table_id;
103         INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
104
105         return 0;
106 }
107
108 static void
109 _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
110 {
111         const struct iphdr *iph = ip_hdr(skb);
112         int ihl = iph->ihl;
113         u8 *xprth = skb_network_header(skb) + ihl * 4;
114         struct flowi4 *fl4 = &fl->u.ip4;
115         int oif = 0;
116
117         if (skb_dst(skb))
118                 oif = skb_dst(skb)->dev->ifindex;
119
120         memset(fl4, 0, sizeof(struct flowi4));
121         fl4->flowi4_mark = skb->mark;
122         fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
123
124         fl4->flowi4_proto = iph->protocol;
125         fl4->daddr = reverse ? iph->saddr : iph->daddr;
126         fl4->saddr = reverse ? iph->daddr : iph->saddr;
127         fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
128
129         if (!ip_is_fragment(iph)) {
130                 switch (iph->protocol) {
131                 case IPPROTO_UDP:
132                 case IPPROTO_UDPLITE:
133                 case IPPROTO_TCP:
134                 case IPPROTO_SCTP:
135                 case IPPROTO_DCCP:
136                         if (xprth + 4 < skb->data ||
137                             pskb_may_pull(skb, xprth + 4 - skb->data)) {
138                                 __be16 *ports;
139
140                                 xprth = skb_network_header(skb) + ihl * 4;
141                                 ports = (__be16 *)xprth;
142
143                                 fl4->fl4_sport = ports[!!reverse];
144                                 fl4->fl4_dport = ports[!reverse];
145                         }
146                         break;
147
148                 case IPPROTO_ICMP:
149                         if (xprth + 2 < skb->data ||
150                             pskb_may_pull(skb, xprth + 2 - skb->data)) {
151                                 u8 *icmp;
152
153                                 xprth = skb_network_header(skb) + ihl * 4;
154                                 icmp = xprth;
155
156                                 fl4->fl4_icmp_type = icmp[0];
157                                 fl4->fl4_icmp_code = icmp[1];
158                         }
159                         break;
160
161                 case IPPROTO_ESP:
162                         if (xprth + 4 < skb->data ||
163                             pskb_may_pull(skb, xprth + 4 - skb->data)) {
164                                 __be32 *ehdr;
165
166                                 xprth = skb_network_header(skb) + ihl * 4;
167                                 ehdr = (__be32 *)xprth;
168
169                                 fl4->fl4_ipsec_spi = ehdr[0];
170                         }
171                         break;
172
173                 case IPPROTO_AH:
174                         if (xprth + 8 < skb->data ||
175                             pskb_may_pull(skb, xprth + 8 - skb->data)) {
176                                 __be32 *ah_hdr;
177
178                                 xprth = skb_network_header(skb) + ihl * 4;
179                                 ah_hdr = (__be32 *)xprth;
180
181                                 fl4->fl4_ipsec_spi = ah_hdr[1];
182                         }
183                         break;
184
185                 case IPPROTO_COMP:
186                         if (xprth + 4 < skb->data ||
187                             pskb_may_pull(skb, xprth + 4 - skb->data)) {
188                                 __be16 *ipcomp_hdr;
189
190                                 xprth = skb_network_header(skb) + ihl * 4;
191                                 ipcomp_hdr = (__be16 *)xprth;
192
193                                 fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
194                         }
195                         break;
196
197                 case IPPROTO_GRE:
198                         if (xprth + 12 < skb->data ||
199                             pskb_may_pull(skb, xprth + 12 - skb->data)) {
200                                 __be16 *greflags;
201                                 __be32 *gre_hdr;
202
203                                 xprth = skb_network_header(skb) + ihl * 4;
204                                 greflags = (__be16 *)xprth;
205                                 gre_hdr = (__be32 *)xprth;
206
207                                 if (greflags[0] & GRE_KEY) {
208                                         if (greflags[0] & GRE_CSUM)
209                                                 gre_hdr++;
210                                         fl4->fl4_gre_key = gre_hdr[1];
211                                 }
212                         }
213                         break;
214
215                 default:
216                         fl4->fl4_ipsec_spi = 0;
217                         break;
218                 }
219         }
220 }
221
222 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
223 {
224         struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
225
226         xfrm4_policy_afinfo.garbage_collect(net);
227         return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
228 }
229
230 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
231                               struct sk_buff *skb, u32 mtu)
232 {
233         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
234         struct dst_entry *path = xdst->route;
235
236         path->ops->update_pmtu(path, sk, skb, mtu);
237 }
238
239 static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
240                            struct sk_buff *skb)
241 {
242         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
243         struct dst_entry *path = xdst->route;
244
245         path->ops->redirect(path, sk, skb);
246 }
247
248 static void xfrm4_dst_destroy(struct dst_entry *dst)
249 {
250         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
251
252         dst_destroy_metrics_generic(dst);
253
254         xfrm_dst_destroy(xdst);
255 }
256
257 static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
258                              int unregister)
259 {
260         if (!unregister)
261                 return;
262
263         xfrm_dst_ifdown(dst, dev);
264 }
265
266 static struct dst_ops xfrm4_dst_ops_template = {
267         .family =               AF_INET,
268         .gc =                   xfrm4_garbage_collect,
269         .update_pmtu =          xfrm4_update_pmtu,
270         .redirect =             xfrm4_redirect,
271         .cow_metrics =          dst_cow_metrics_generic,
272         .destroy =              xfrm4_dst_destroy,
273         .ifdown =               xfrm4_dst_ifdown,
274         .local_out =            __ip_local_out,
275         .gc_thresh =            INT_MAX,
276 };
277
278 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
279         .family =               AF_INET,
280         .dst_ops =              &xfrm4_dst_ops_template,
281         .dst_lookup =           xfrm4_dst_lookup,
282         .get_saddr =            xfrm4_get_saddr,
283         .decode_session =       _decode_session4,
284         .get_tos =              xfrm4_get_tos,
285         .init_path =            xfrm4_init_path,
286         .fill_dst =             xfrm4_fill_dst,
287         .blackhole_route =      ipv4_blackhole_route,
288 };
289
290 #ifdef CONFIG_SYSCTL
291 static struct ctl_table xfrm4_policy_table[] = {
292         {
293                 .procname       = "xfrm4_gc_thresh",
294                 .data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
295                 .maxlen         = sizeof(int),
296                 .mode           = 0644,
297                 .proc_handler   = proc_dointvec,
298         },
299         { }
300 };
301
302 static __net_init int xfrm4_net_sysctl_init(struct net *net)
303 {
304         struct ctl_table *table;
305         struct ctl_table_header *hdr;
306
307         table = xfrm4_policy_table;
308         if (!net_eq(net, &init_net)) {
309                 table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL);
310                 if (!table)
311                         goto err_alloc;
312
313                 table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
314         }
315
316         hdr = register_net_sysctl(net, "net/ipv4", table);
317         if (!hdr)
318                 goto err_reg;
319
320         net->ipv4.xfrm4_hdr = hdr;
321         return 0;
322
323 err_reg:
324         if (!net_eq(net, &init_net))
325                 kfree(table);
326 err_alloc:
327         return -ENOMEM;
328 }
329
330 static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
331 {
332         struct ctl_table *table;
333
334         if (!net->ipv4.xfrm4_hdr)
335                 return;
336
337         table = net->ipv4.xfrm4_hdr->ctl_table_arg;
338         unregister_net_sysctl_table(net->ipv4.xfrm4_hdr);
339         if (!net_eq(net, &init_net))
340                 kfree(table);
341 }
342 #else /* CONFIG_SYSCTL */
343 static inline int xfrm4_net_sysctl_init(struct net *net)
344 {
345         return 0;
346 }
347
348 static inline void xfrm4_net_sysctl_exit(struct net *net)
349 {
350 }
351 #endif
352
353 static int __net_init xfrm4_net_init(struct net *net)
354 {
355         int ret;
356
357         memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
358                sizeof(xfrm4_dst_ops_template));
359         ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
360         if (ret)
361                 return ret;
362
363         ret = xfrm4_net_sysctl_init(net);
364         if (ret)
365                 dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
366
367         return ret;
368 }
369
370 static void __net_exit xfrm4_net_exit(struct net *net)
371 {
372         xfrm4_net_sysctl_exit(net);
373         dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
374 }
375
376 static struct pernet_operations __net_initdata xfrm4_net_ops = {
377         .init   = xfrm4_net_init,
378         .exit   = xfrm4_net_exit,
379 };
380
381 static void __init xfrm4_policy_init(void)
382 {
383         xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
384 }
385
386 void __init xfrm4_init(void)
387 {
388         xfrm4_state_init();
389         xfrm4_policy_init();
390         xfrm4_protocol_init();
391         register_pernet_subsys(&xfrm4_net_ops);
392 }
393