GNU Linux-libre 4.14.266-gnu1
[releases.git] / net / netfilter / nft_set_rbtree.c
1 /*
2  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Development of this code funded by Astaro AG (http://www.astaro.com/)
9  */
10
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/list.h>
15 #include <linux/rbtree.h>
16 #include <linux/netlink.h>
17 #include <linux/netfilter.h>
18 #include <linux/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_tables.h>
20
21 struct nft_rbtree {
22         struct rb_root          root;
23         rwlock_t                lock;
24         seqcount_t              count;
25 };
26
27 struct nft_rbtree_elem {
28         struct rb_node          node;
29         struct nft_set_ext      ext;
30 };
31
32 static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
33 {
34         return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
35                (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
36 }
37
38 static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
39                              const struct nft_rbtree_elem *interval)
40 {
41         return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
42 }
43
44 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
45                                 const u32 *key, const struct nft_set_ext **ext,
46                                 unsigned int seq)
47 {
48         struct nft_rbtree *priv = nft_set_priv(set);
49         const struct nft_rbtree_elem *rbe, *interval = NULL;
50         u8 genmask = nft_genmask_cur(net);
51         const struct rb_node *parent;
52         const void *this;
53         int d;
54
55         parent = rcu_dereference_raw(priv->root.rb_node);
56         while (parent != NULL) {
57                 if (read_seqcount_retry(&priv->count, seq))
58                         return false;
59
60                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
61
62                 this = nft_set_ext_key(&rbe->ext);
63                 d = memcmp(this, key, set->klen);
64                 if (d < 0) {
65                         parent = rcu_dereference_raw(parent->rb_left);
66                         if (interval &&
67                             nft_rbtree_equal(set, this, interval) &&
68                             nft_rbtree_interval_end(this) &&
69                             !nft_rbtree_interval_end(interval))
70                                 continue;
71                         interval = rbe;
72                 } else if (d > 0)
73                         parent = rcu_dereference_raw(parent->rb_right);
74                 else {
75                         if (!nft_set_elem_active(&rbe->ext, genmask)) {
76                                 parent = rcu_dereference_raw(parent->rb_left);
77                                 continue;
78                         }
79                         if (nft_rbtree_interval_end(rbe))
80                                 goto out;
81
82                         *ext = &rbe->ext;
83                         return true;
84                 }
85         }
86
87         if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
88             nft_set_elem_active(&interval->ext, genmask) &&
89             !nft_rbtree_interval_end(interval)) {
90                 *ext = &interval->ext;
91                 return true;
92         }
93 out:
94         return false;
95 }
96
97 static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
98                               const u32 *key, const struct nft_set_ext **ext)
99 {
100         struct nft_rbtree *priv = nft_set_priv(set);
101         unsigned int seq = read_seqcount_begin(&priv->count);
102         bool ret;
103
104         ret = __nft_rbtree_lookup(net, set, key, ext, seq);
105         if (ret || !read_seqcount_retry(&priv->count, seq))
106                 return ret;
107
108         read_lock_bh(&priv->lock);
109         seq = read_seqcount_begin(&priv->count);
110         ret = __nft_rbtree_lookup(net, set, key, ext, seq);
111         read_unlock_bh(&priv->lock);
112
113         return ret;
114 }
115
116 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
117                                struct nft_rbtree_elem *new,
118                                struct nft_set_ext **ext)
119 {
120         struct nft_rbtree *priv = nft_set_priv(set);
121         u8 genmask = nft_genmask_next(net);
122         struct nft_rbtree_elem *rbe;
123         struct rb_node *parent, **p;
124         int d;
125
126         parent = NULL;
127         p = &priv->root.rb_node;
128         while (*p != NULL) {
129                 parent = *p;
130                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
131                 d = memcmp(nft_set_ext_key(&rbe->ext),
132                            nft_set_ext_key(&new->ext),
133                            set->klen);
134                 if (d < 0)
135                         p = &parent->rb_left;
136                 else if (d > 0)
137                         p = &parent->rb_right;
138                 else {
139                         if (nft_rbtree_interval_end(rbe) &&
140                             !nft_rbtree_interval_end(new)) {
141                                 p = &parent->rb_left;
142                         } else if (!nft_rbtree_interval_end(rbe) &&
143                                    nft_rbtree_interval_end(new)) {
144                                 p = &parent->rb_right;
145                         } else if (nft_set_elem_active(&rbe->ext, genmask)) {
146                                 *ext = &rbe->ext;
147                                 return -EEXIST;
148                         } else {
149                                 p = &parent->rb_left;
150                         }
151                 }
152         }
153         rb_link_node_rcu(&new->node, parent, p);
154         rb_insert_color(&new->node, &priv->root);
155         return 0;
156 }
157
158 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
159                              const struct nft_set_elem *elem,
160                              struct nft_set_ext **ext)
161 {
162         struct nft_rbtree *priv = nft_set_priv(set);
163         struct nft_rbtree_elem *rbe = elem->priv;
164         int err;
165
166         write_lock_bh(&priv->lock);
167         write_seqcount_begin(&priv->count);
168         err = __nft_rbtree_insert(net, set, rbe, ext);
169         write_seqcount_end(&priv->count);
170         write_unlock_bh(&priv->lock);
171
172         return err;
173 }
174
175 static void nft_rbtree_remove(const struct net *net,
176                               const struct nft_set *set,
177                               const struct nft_set_elem *elem)
178 {
179         struct nft_rbtree *priv = nft_set_priv(set);
180         struct nft_rbtree_elem *rbe = elem->priv;
181
182         write_lock_bh(&priv->lock);
183         write_seqcount_begin(&priv->count);
184         rb_erase(&rbe->node, &priv->root);
185         write_seqcount_end(&priv->count);
186         write_unlock_bh(&priv->lock);
187 }
188
189 static void nft_rbtree_activate(const struct net *net,
190                                 const struct nft_set *set,
191                                 const struct nft_set_elem *elem)
192 {
193         struct nft_rbtree_elem *rbe = elem->priv;
194
195         nft_set_elem_change_active(net, set, &rbe->ext);
196 }
197
198 static bool nft_rbtree_flush(const struct net *net,
199                              const struct nft_set *set, void *priv)
200 {
201         struct nft_rbtree_elem *rbe = priv;
202
203         nft_set_elem_change_active(net, set, &rbe->ext);
204         return true;
205 }
206
207 static void *nft_rbtree_deactivate(const struct net *net,
208                                    const struct nft_set *set,
209                                    const struct nft_set_elem *elem)
210 {
211         const struct nft_rbtree *priv = nft_set_priv(set);
212         const struct rb_node *parent = priv->root.rb_node;
213         struct nft_rbtree_elem *rbe, *this = elem->priv;
214         u8 genmask = nft_genmask_next(net);
215         int d;
216
217         while (parent != NULL) {
218                 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
219
220                 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
221                                            set->klen);
222                 if (d < 0)
223                         parent = parent->rb_left;
224                 else if (d > 0)
225                         parent = parent->rb_right;
226                 else {
227                         if (nft_rbtree_interval_end(rbe) &&
228                             !nft_rbtree_interval_end(this)) {
229                                 parent = parent->rb_left;
230                                 continue;
231                         } else if (!nft_rbtree_interval_end(rbe) &&
232                                    nft_rbtree_interval_end(this)) {
233                                 parent = parent->rb_right;
234                                 continue;
235                         } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
236                                 parent = parent->rb_left;
237                                 continue;
238                         }
239                         nft_rbtree_flush(net, set, rbe);
240                         return rbe;
241                 }
242         }
243         return NULL;
244 }
245
246 static void nft_rbtree_walk(const struct nft_ctx *ctx,
247                             struct nft_set *set,
248                             struct nft_set_iter *iter)
249 {
250         struct nft_rbtree *priv = nft_set_priv(set);
251         struct nft_rbtree_elem *rbe;
252         struct nft_set_elem elem;
253         struct rb_node *node;
254
255         read_lock_bh(&priv->lock);
256         for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
257                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
258
259                 if (iter->count < iter->skip)
260                         goto cont;
261                 if (!nft_set_elem_active(&rbe->ext, iter->genmask))
262                         goto cont;
263
264                 elem.priv = rbe;
265
266                 iter->err = iter->fn(ctx, set, iter, &elem);
267                 if (iter->err < 0) {
268                         read_unlock_bh(&priv->lock);
269                         return;
270                 }
271 cont:
272                 iter->count++;
273         }
274         read_unlock_bh(&priv->lock);
275 }
276
277 static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
278                                         const struct nft_set_desc *desc)
279 {
280         return sizeof(struct nft_rbtree);
281 }
282
283 static int nft_rbtree_init(const struct nft_set *set,
284                            const struct nft_set_desc *desc,
285                            const struct nlattr * const nla[])
286 {
287         struct nft_rbtree *priv = nft_set_priv(set);
288
289         rwlock_init(&priv->lock);
290         seqcount_init(&priv->count);
291         priv->root = RB_ROOT;
292         return 0;
293 }
294
295 static void nft_rbtree_destroy(const struct nft_set *set)
296 {
297         struct nft_rbtree *priv = nft_set_priv(set);
298         struct nft_rbtree_elem *rbe;
299         struct rb_node *node;
300
301         while ((node = priv->root.rb_node) != NULL) {
302                 rb_erase(node, &priv->root);
303                 rbe = rb_entry(node, struct nft_rbtree_elem, node);
304                 nft_set_elem_destroy(set, rbe, true);
305         }
306 }
307
308 static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
309                                 struct nft_set_estimate *est)
310 {
311         if (desc->size)
312                 est->size = sizeof(struct nft_rbtree) +
313                             desc->size * sizeof(struct nft_rbtree_elem);
314         else
315                 est->size = ~0;
316
317         est->lookup = NFT_SET_CLASS_O_LOG_N;
318         est->space  = NFT_SET_CLASS_O_N;
319
320         return true;
321 }
322
323 static struct nft_set_type nft_rbtree_type;
324 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
325         .type           = &nft_rbtree_type,
326         .privsize       = nft_rbtree_privsize,
327         .elemsize       = offsetof(struct nft_rbtree_elem, ext),
328         .estimate       = nft_rbtree_estimate,
329         .init           = nft_rbtree_init,
330         .destroy        = nft_rbtree_destroy,
331         .insert         = nft_rbtree_insert,
332         .remove         = nft_rbtree_remove,
333         .deactivate     = nft_rbtree_deactivate,
334         .flush          = nft_rbtree_flush,
335         .activate       = nft_rbtree_activate,
336         .lookup         = nft_rbtree_lookup,
337         .walk           = nft_rbtree_walk,
338         .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
339 };
340
341 static struct nft_set_type nft_rbtree_type __read_mostly = {
342         .ops            = &nft_rbtree_ops,
343         .owner          = THIS_MODULE,
344 };
345
346 static int __init nft_rbtree_module_init(void)
347 {
348         return nft_register_set(&nft_rbtree_type);
349 }
350
351 static void __exit nft_rbtree_module_exit(void)
352 {
353         nft_unregister_set(&nft_rbtree_type);
354 }
355
356 module_init(nft_rbtree_module_init);
357 module_exit(nft_rbtree_module_exit);
358
359 MODULE_LICENSE("GPL");
360 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
361 MODULE_ALIAS_NFT_SET();