Print this page
6659 nvlist_free(NULL) is a no-op
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/dev/sdev_ncache.c
+++ new/usr/src/uts/common/fs/dev/sdev_ncache.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * negative cache handling for the /dev fs
28 28 */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/param.h>
32 32 #include <sys/t_lock.h>
33 33 #include <sys/systm.h>
34 34 #include <sys/sysmacros.h>
35 35 #include <sys/user.h>
36 36 #include <sys/time.h>
37 37 #include <sys/vfs.h>
38 38 #include <sys/vnode.h>
39 39 #include <sys/file.h>
40 40 #include <sys/fcntl.h>
41 41 #include <sys/flock.h>
42 42 #include <sys/kmem.h>
43 43 #include <sys/uio.h>
44 44 #include <sys/errno.h>
45 45 #include <sys/stat.h>
46 46 #include <sys/cred.h>
47 47 #include <sys/cmn_err.h>
48 48 #include <sys/debug.h>
49 49 #include <sys/mode.h>
50 50 #include <sys/policy.h>
51 51 #include <fs/fs_subr.h>
52 52 #include <sys/mount.h>
53 53 #include <sys/fs/snode.h>
54 54 #include <sys/fs/dv_node.h>
55 55 #include <sys/fs/sdev_impl.h>
56 56 #include <sys/sunndi.h>
57 57 #include <sys/sunmdi.h>
58 58 #include <sys/ddi.h>
59 59 #include <sys/modctl.h>
60 60 #include <sys/devcache.h>
61 61
62 62
63 63 /*
64 64 * ncache is a negative cache of failed lookups. An entry
65 65 * is added after an attempt to configure a device by that
66 66 * name failed. An accumulation of these entries over time
67 67 * gives us a set of device name for which implicit reconfiguration
68 68 * does not need to be attempted. If a name is created matching
69 69 * an entry in ncache, that entry is removed, with the
70 70 * persistent store updated.
71 71 *
72 72 * Implicit reconfig is initiated for any name during lookup that
73 73 * can't be resolved from the backing store and that isn't
74 74 * present in the negative cache. This functionality is
75 75 * enabled during system startup once communication with devfsadm
76 76 * can be achieved. Since readdir is more general, implicit
77 77 * reconfig initiated by reading a directory isn't enabled until
78 78 * the system is more fully booted, at the time of the multi-user
79 79 * milestone, corresponding to init state 2.
80 80 *
81 81 * A maximum is imposed on the number of entries in the cache
82 82 * to limit some script going wild and as a defense against attack.
83 83 * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
84 84 *
85 85 * Each entry also has a expiration count. When looked up a name in
86 86 * the cache is set to the default. Subsequent boots will decrement
87 87 * the count if a name isn't referenced. This permits a once-only
88 88 * entry to eventually be removed over time.
89 89 *
90 90 * sdev_reconfig_delay implements a "debounce" of the timing beyond
91 91 * system available indication, providing what the filesystem considers
92 92 * to be the system-is-fully-booted state. This is provided to adjust
93 93 * the timing if some application startup is performing a readdir
94 94 * in /dev that initiates a troublesome implicit reconfig on every boot.
95 95 *
96 96 * sdev_nc_disable_reset can be used to disable clearing the negative cache
97 97 * on reconfig boot. The default is to clear the cache on reconfig boot.
98 98 * sdev_nc_disable can be used to disable the negative cache itself.
99 99 *
100 100 * sdev_reconfig_disable can be used to disable implicit reconfig.
101 101 * The default is that implicit reconfig is enabled.
102 102 */
103 103
104 104 /* tunables and defaults */
105 105 #define SDEV_NC_EXPIRECNT 4
106 106 #define SDEV_NC_MAX_ENTRIES 64
107 107 #define SEV_RECONFIG_DELAY 6 /* seconds */
108 108
109 109 /* tunables */
110 110 int sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
111 111 int sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
112 112 int sdev_reconfig_delay = SEV_RECONFIG_DELAY;
113 113 int sdev_reconfig_verbose = 0;
114 114 int sdev_reconfig_disable = 0;
115 115 int sdev_nc_disable = 0;
116 116 int sdev_nc_disable_reset = 0;
117 117 int sdev_nc_verbose = 0;
118 118 int sdev_cache_read_disable = 0;
119 119 int sdev_cache_write_disable = 0;
120 120
121 121 /* globals */
122 122 int sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
123 123 int sdev_reconfig_boot = 0;
124 124 sdev_nc_list_t *sdev_ncache;
125 125 static nvf_handle_t sdevfd_handle;
126 126
127 127 /* static prototypes */
128 128 static void sdev_ncache_write_complete(nvf_handle_t);
129 129 static void sdev_ncache_write(void);
130 130 static void sdev_ncache_process_store(void);
131 131 static sdev_nc_list_t *sdev_nc_newlist(void);
132 132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
133 133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
134 134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
135 135 static void sdev_nc_free_bootonly(void);
136 136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
137 137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
138 138 static void sdev_ncache_list_free(nvf_handle_t);
139 139 static void sdev_nvp_free(nvp_devname_t *);
140 140
141 141 /*
142 142 * Registration for /etc/devices/devname_cache
143 143 */
144 144 static nvf_ops_t sdev_cache_ops = {
145 145 "/etc/devices/devname_cache", /* path to cache */
146 146 sdev_ncache_unpack_nvlist, /* read: unpack nvlist */
147 147 sdev_ncache_pack_list, /* write: pack list */
148 148 sdev_ncache_list_free, /* free data list */
149 149 sdev_ncache_write_complete /* write complete callback */
150 150 };
151 151
152 152 /*
153 153 * called once at filesystem initialization
154 154 */
155 155 void
156 156 sdev_ncache_init(void)
157 157 {
158 158 sdev_ncache = sdev_nc_newlist();
159 159 }
160 160
161 161 /*
162 162 * called at mount of the global instance
163 163 * currently the global instance is never unmounted
164 164 */
165 165 void
166 166 sdev_ncache_setup(void)
167 167 {
168 168 sdevfd_handle = nvf_register_file(&sdev_cache_ops);
169 169 ASSERT(sdevfd_handle);
170 170
171 171 list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
172 172 offsetof(nvp_devname_t, nvp_link));
173 173
174 174 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
175 175 if (!sdev_cache_read_disable) {
176 176 (void) nvf_read_file(sdevfd_handle);
177 177 }
178 178 sdev_ncache_process_store();
179 179 rw_exit(nvf_lock(sdevfd_handle));
180 180
181 181 sdev_devstate_change();
182 182 }
183 183
184 184 static void
185 185 sdev_nvp_free(nvp_devname_t *dp)
186 186 {
187 187 int i;
188 188 char **p;
189 189
190 190 if (dp->nvp_npaths > 0) {
191 191 p = dp->nvp_paths;
192 192 for (i = 0; i < dp->nvp_npaths; i++, p++) {
193 193 kmem_free(*p, strlen(*p)+1);
194 194 }
195 195 kmem_free(dp->nvp_paths,
196 196 dp->nvp_npaths * sizeof (char *));
197 197 kmem_free(dp->nvp_expirecnts,
198 198 dp->nvp_npaths * sizeof (int));
199 199 }
200 200
201 201 kmem_free(dp, sizeof (nvp_devname_t));
202 202 }
203 203
204 204 static void
205 205 sdev_ncache_list_free(nvf_handle_t fd)
206 206 {
207 207 list_t *listp;
208 208 nvp_devname_t *dp;
209 209
210 210 ASSERT(fd == sdevfd_handle);
211 211 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
212 212
213 213 listp = nvf_list(fd);
214 214 if ((dp = list_head(listp)) != NULL) {
215 215 list_remove(listp, dp);
216 216 sdev_nvp_free(dp);
217 217 }
218 218 }
219 219
220 220 /*
221 221 * Unpack a device path/nvlist pair to internal data list format.
222 222 * Used to decode the nvlist format into the internal representation
223 223 * when reading /etc/devices/devname_cache.
224 224 * Note that the expiration counts are optional, for compatibility
225 225 * with earlier instances of the cache. If not present, the
226 226 * expire counts are initialized to defaults.
227 227 */
228 228 static int
229 229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
230 230 {
231 231 nvp_devname_t *np;
232 232 char **strs;
233 233 int *cnts;
234 234 uint_t nstrs, ncnts;
235 235 int rval, i;
236 236
237 237 ASSERT(fd == sdevfd_handle);
238 238 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
239 239
240 240 /* name of the sublist must match what we created */
241 241 if (strcmp(name, DP_DEVNAME_ID) != 0) {
242 242 return (-1);
243 243 }
244 244
245 245 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
246 246
247 247 rval = nvlist_lookup_string_array(nvl,
248 248 DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
249 249 if (rval) {
250 250 kmem_free(np, sizeof (nvp_devname_t));
251 251 return (-1);
252 252 }
253 253
254 254 np->nvp_npaths = nstrs;
255 255 np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
256 256 for (i = 0; i < nstrs; i++) {
257 257 np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
258 258 }
259 259 np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
260 260 for (i = 0; i < nstrs; i++) {
261 261 np->nvp_expirecnts[i] = sdev_nc_expirecnt;
262 262 }
263 263
264 264 rval = nvlist_lookup_int32_array(nvl,
265 265 DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
266 266 if (rval == 0) {
267 267 ASSERT(ncnts == nstrs);
268 268 ncnts = min(ncnts, nstrs);
269 269 for (i = 0; i < nstrs; i++) {
270 270 np->nvp_expirecnts[i] = cnts[i];
271 271 }
272 272 }
273 273
274 274 list_insert_tail(nvf_list(sdevfd_handle), np);
275 275
276 276 return (0);
277 277 }
278 278
279 279 /*
280 280 * Pack internal format cache data to a single nvlist.
281 281 * Used when writing the nvlist file.
282 282 * Note this is called indirectly by the nvpflush daemon.
283 283 */
284 284 static int
285 285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
286 286 {
287 287 nvlist_t *nvl, *sub_nvl;
288 288 nvp_devname_t *np;
289 289 int rval;
290 290 list_t *listp;
291 291
292 292 ASSERT(fd == sdevfd_handle);
293 293 ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
294 294
295 295 rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
296 296 if (rval != 0) {
297 297 nvf_error("%s: nvlist alloc error %d\n",
298 298 nvf_cache_name(fd), rval);
299 299 return (DDI_FAILURE);
300 300 }
301 301
302 302 listp = nvf_list(sdevfd_handle);
303 303 if ((np = list_head(listp)) != NULL) {
304 304 ASSERT(list_next(listp, np) == NULL);
305 305
306 306 rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
307 307 if (rval != 0) {
308 308 nvf_error("%s: nvlist alloc error %d\n",
309 309 nvf_cache_name(fd), rval);
310 310 sub_nvl = NULL;
311 311 goto err;
312 312 }
313 313
314 314 rval = nvlist_add_string_array(sub_nvl,
315 315 DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
316 316 if (rval != 0) {
317 317 nvf_error("%s: nvlist add error %d (sdev)\n",
318 318 nvf_cache_name(fd), rval);
319 319 goto err;
320 320 }
321 321
322 322 rval = nvlist_add_int32_array(sub_nvl,
323 323 DP_DEVNAME_NC_EXPIRECNT_ID,
324 324 np->nvp_expirecnts, np->nvp_npaths);
325 325 if (rval != 0) {
326 326 nvf_error("%s: nvlist add error %d (sdev)\n",
327 327 nvf_cache_name(fd), rval);
328 328 goto err;
329 329 }
330 330
331 331 rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
332 332 if (rval != 0) {
333 333 nvf_error("%s: nvlist add error %d (sublist)\n",
↓ open down ↓ |
333 lines elided |
↑ open up ↑ |
334 334 nvf_cache_name(fd), rval);
335 335 goto err;
336 336 }
337 337 nvlist_free(sub_nvl);
338 338 }
339 339
340 340 *ret_nvl = nvl;
341 341 return (DDI_SUCCESS);
342 342
343 343 err:
344 - if (sub_nvl)
345 - nvlist_free(sub_nvl);
344 + nvlist_free(sub_nvl);
346 345 nvlist_free(nvl);
347 346 *ret_nvl = NULL;
348 347 return (DDI_FAILURE);
349 348 }
350 349
351 350 /*
352 351 * Run through the data read from the backing cache store
353 352 * to establish the initial state of the neg. cache.
354 353 */
355 354 static void
356 355 sdev_ncache_process_store(void)
357 356 {
358 357 sdev_nc_list_t *ncl = sdev_ncache;
359 358 nvp_devname_t *np;
360 359 sdev_nc_node_t *lp;
361 360 char *path;
362 361 int i, n;
363 362 list_t *listp;
364 363
365 364 if (sdev_nc_disable)
366 365 return;
367 366
368 367 ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
369 368
370 369 listp = nvf_list(sdevfd_handle);
371 370 for (np = list_head(listp); np; np = list_next(listp, np)) {
372 371 for (i = 0; i < np->nvp_npaths; i++) {
373 372 sdcmn_err5((" %s %d\n",
374 373 np->nvp_paths[i], np->nvp_expirecnts[i]));
375 374 if (ncl->ncl_nentries < sdev_nc_max_entries) {
376 375 path = np->nvp_paths[i];
377 376 n = strlen(path) + 1;
378 377 lp = kmem_alloc(sizeof (sdev_nc_node_t),
379 378 KM_SLEEP);
380 379 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
381 380 bcopy(path, lp->ncn_name, n);
382 381 lp->ncn_flags = NCN_SRC_STORE;
383 382 lp->ncn_expirecnt = np->nvp_expirecnts[i];
384 383 sdev_nc_insertnode(ncl, lp);
385 384 } else if (sdev_nc_verbose) {
386 385 cmn_err(CE_CONT,
387 386 "?%s: truncating from ncache (max %d)\n",
388 387 np->nvp_paths[i], sdev_nc_max_entries);
389 388 }
390 389 }
391 390 }
392 391 }
393 392
394 393 /*
395 394 * called by nvpflush daemon to inform us that an update of
396 395 * the cache file has been completed.
397 396 */
398 397 static void
399 398 sdev_ncache_write_complete(nvf_handle_t fd)
400 399 {
401 400 sdev_nc_list_t *ncl = sdev_ncache;
402 401
403 402 ASSERT(fd == sdevfd_handle);
404 403
405 404 mutex_enter(&ncl->ncl_mutex);
406 405
407 406 ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
408 407
409 408 if (ncl->ncl_flags & NCL_LIST_DIRTY) {
410 409 sdcmn_err5(("ncache write complete but dirty again\n"));
411 410 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
412 411 mutex_exit(&ncl->ncl_mutex);
413 412 sdev_ncache_write();
414 413 } else {
415 414 sdcmn_err5(("ncache write complete\n"));
416 415 ncl->ncl_flags &= ~NCL_LIST_WRITING;
417 416 mutex_exit(&ncl->ncl_mutex);
418 417 rw_enter(nvf_lock(fd), RW_WRITER);
419 418 sdev_ncache_list_free(fd);
420 419 rw_exit(nvf_lock(fd));
421 420 }
422 421 }
423 422
424 423 /*
425 424 * Prepare to perform an update of the neg. cache backing store.
426 425 */
427 426 static void
428 427 sdev_ncache_write(void)
429 428 {
430 429 sdev_nc_list_t *ncl = sdev_ncache;
431 430 nvp_devname_t *np;
432 431 sdev_nc_node_t *lp;
433 432 int n, i;
434 433
435 434 if (sdev_cache_write_disable) {
436 435 mutex_enter(&ncl->ncl_mutex);
437 436 ncl->ncl_flags &= ~NCL_LIST_WRITING;
438 437 mutex_exit(&ncl->ncl_mutex);
439 438 return;
440 439 }
441 440
442 441 /* proper lock ordering here is essential */
443 442 rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
444 443 sdev_ncache_list_free(sdevfd_handle);
445 444
446 445 rw_enter(&ncl->ncl_lock, RW_READER);
447 446 n = ncl->ncl_nentries;
448 447 ASSERT(n <= sdev_nc_max_entries);
449 448
450 449 np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
451 450 np->nvp_npaths = n;
452 451 np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
453 452 np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
454 453
455 454 i = 0;
456 455 for (lp = list_head(&ncl->ncl_list); lp;
457 456 lp = list_next(&ncl->ncl_list, lp)) {
458 457 np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
459 458 np->nvp_expirecnts[i] = lp->ncn_expirecnt;
460 459 sdcmn_err5((" %s %d\n",
461 460 np->nvp_paths[i], np->nvp_expirecnts[i]));
462 461 i++;
463 462 }
464 463
465 464 rw_exit(&ncl->ncl_lock);
466 465
467 466 nvf_mark_dirty(sdevfd_handle);
468 467 list_insert_tail(nvf_list(sdevfd_handle), np);
469 468 rw_exit(nvf_lock(sdevfd_handle));
470 469
471 470 nvf_wake_daemon();
472 471 }
473 472
474 473 static void
475 474 sdev_nc_flush_updates(void)
476 475 {
477 476 sdev_nc_list_t *ncl = sdev_ncache;
478 477
479 478 if (sdev_nc_disable || sdev_cache_write_disable)
480 479 return;
481 480
482 481 mutex_enter(&ncl->ncl_mutex);
483 482 if (((ncl->ncl_flags &
484 483 (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
485 484 (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
486 485 ncl->ncl_flags &= ~NCL_LIST_DIRTY;
487 486 ncl->ncl_flags |= NCL_LIST_WRITING;
488 487 mutex_exit(&ncl->ncl_mutex);
489 488 sdev_ncache_write();
490 489 } else {
491 490 mutex_exit(&ncl->ncl_mutex);
492 491 }
493 492 }
494 493
495 494 static void
496 495 sdev_nc_flush_boot_update(void)
497 496 {
498 497 sdev_nc_list_t *ncl = sdev_ncache;
499 498
500 499 if (sdev_nc_disable || sdev_cache_write_disable ||
501 500 (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
502 501 return;
503 502 }
504 503 mutex_enter(&ncl->ncl_mutex);
505 504 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
506 505 mutex_exit(&ncl->ncl_mutex);
507 506 sdev_nc_flush_updates();
508 507 } else {
509 508 mutex_exit(&ncl->ncl_mutex);
510 509 }
511 510
512 511 }
513 512
514 513 static void
515 514 sdev_state_boot_complete()
516 515 {
517 516 sdev_nc_list_t *ncl = sdev_ncache;
518 517 sdev_nc_node_t *lp, *next;
519 518
520 519 /*
521 520 * Once boot is complete, decrement the expire count of each entry
522 521 * in the cache not touched by a reference. Remove any that
523 522 * goes to zero. This effectively removes random entries over
524 523 * time.
525 524 */
526 525 rw_enter(&ncl->ncl_lock, RW_WRITER);
527 526 mutex_enter(&ncl->ncl_mutex);
528 527
529 528 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
530 529 next = list_next(&ncl->ncl_list, lp);
531 530 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
532 531 if (lp->ncn_flags & NCN_ACTIVE) {
533 532 if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
534 533 lp->ncn_expirecnt = sdev_nc_expirecnt;
535 534 ncl->ncl_flags |= NCL_LIST_DIRTY;
536 535 }
537 536 } else {
538 537 if (--lp->ncn_expirecnt == 0) {
539 538 list_remove(&ncl->ncl_list, lp);
540 539 sdev_nc_free_unlinked_node(lp);
541 540 ncl->ncl_nentries--;
542 541 }
543 542 ncl->ncl_flags |= NCL_LIST_DIRTY;
544 543 }
545 544 }
546 545 }
547 546
548 547 mutex_exit(&ncl->ncl_mutex);
549 548 rw_exit(&ncl->ncl_lock);
550 549
551 550 sdev_nc_flush_boot_update();
552 551 sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
553 552 }
554 553
555 554 /*
556 555 * Upon transition to the login state on a reconfigure boot,
557 556 * a debounce timer is set up so that we cache all the nonsense
558 557 * lookups we're hit with by the windowing system startup.
559 558 */
560 559
561 560 /*ARGSUSED*/
562 561 static void
563 562 sdev_state_timeout(void *arg)
564 563 {
565 564 sdev_state_boot_complete();
566 565 }
567 566
568 567 static void
569 568 sdev_state_sysavail()
570 569 {
571 570 sdev_nc_list_t *ncl = sdev_ncache;
572 571 clock_t nticks;
573 572 int nsecs;
574 573
575 574 mutex_enter(&ncl->ncl_mutex);
576 575 ncl->ncl_flags |= NCL_LIST_WENABLE;
577 576 mutex_exit(&ncl->ncl_mutex);
578 577
579 578 nsecs = sdev_reconfig_delay;
580 579 if (nsecs == 0) {
581 580 sdev_state_boot_complete();
582 581 } else {
583 582 nticks = drv_usectohz(1000000 * nsecs);
584 583 sdcmn_err5(("timeout initiated %ld\n", nticks));
585 584 (void) timeout(sdev_state_timeout, NULL, nticks);
586 585 sdev_nc_flush_boot_update();
587 586 }
588 587 }
589 588
590 589 /*
591 590 * Called to inform the filesystem of progress during boot,
592 591 * either a notice of reconfiguration boot or an indication of
593 592 * system boot complete. At system boot complete, set up a
594 593 * timer at the expiration of which no further failed lookups
595 594 * will be added to the negative cache.
596 595 *
597 596 * The dev filesystem infers from reconfig boot that implicit
598 597 * reconfig need not be invoked at all as all available devices
599 598 * will have already been named.
600 599 *
601 600 * The dev filesystem infers from "system available" that devfsadmd
602 601 * can now be run and hence implicit reconfiguration may be initiated.
603 602 * During early stages of system startup, implicit reconfig is
604 603 * not done to avoid impacting boot performance.
605 604 */
606 605 void
607 606 sdev_devstate_change(void)
608 607 {
609 608 int new_state;
610 609
611 610 /*
612 611 * Track system state and manage interesting transitions
613 612 */
614 613 new_state = SDEV_BOOT_STATE_INITIAL;
615 614 if (i_ddi_reconfig())
616 615 new_state = SDEV_BOOT_STATE_RECONFIG;
617 616 if (i_ddi_sysavail())
618 617 new_state = SDEV_BOOT_STATE_SYSAVAIL;
619 618
620 619 if (sdev_boot_state < new_state) {
621 620 switch (new_state) {
622 621 case SDEV_BOOT_STATE_RECONFIG:
623 622 sdcmn_err5(("state change: reconfigure boot\n"));
624 623 sdev_boot_state = new_state;
625 624 /*
626 625 * The /dev filesystem fills a hot-plug .vs.
627 626 * public-namespace gap by invoking 'devfsadm' once
628 627 * as a result of the first /dev lookup failure
629 628 * (or getdents/readdir). Originally, it was thought
630 629 * that a reconfig reboot did not have a hot-plug gap,
631 630 * but this is not true - the gap is just smaller:
632 631 * it exists from the the time the smf invocation of
633 632 * devfsadm completes its forced devinfo snapshot,
634 633 * to the time when the smf devfsadmd daemon invocation
635 634 * is set up and listening for hotplug sysevents.
636 635 * Since there is still a gap with reconfig reboot,
637 636 * we no longer set 'sdev_reconfig_boot'.
638 637 */
639 638 if (!sdev_nc_disable_reset)
640 639 sdev_nc_free_bootonly();
641 640 break;
642 641 case SDEV_BOOT_STATE_SYSAVAIL:
643 642 sdcmn_err5(("system available\n"));
644 643 sdev_boot_state = new_state;
645 644 sdev_state_sysavail();
646 645 break;
647 646 }
648 647 }
649 648 }
650 649
651 650 /*
652 651 * Lookup: filter out entries in the negative cache
653 652 * Return 1 if the lookup should not cause a reconfig.
654 653 */
655 654 int
656 655 sdev_lookup_filter(sdev_node_t *dv, char *nm)
657 656 {
658 657 int n;
659 658 sdev_nc_list_t *ncl = sdev_ncache;
660 659 sdev_nc_node_t *lp;
661 660 char *path;
662 661 int rval = 0;
663 662 int changed = 0;
664 663
665 664 ASSERT(i_ddi_io_initialized());
666 665 ASSERT(SDEVTOV(dv)->v_type == VDIR);
667 666
668 667 if (sdev_nc_disable)
669 668 return (0);
670 669
671 670 n = strlen(dv->sdev_path) + strlen(nm) + 2;
672 671 path = kmem_alloc(n, KM_SLEEP);
673 672 (void) sprintf(path, "%s/%s", dv->sdev_path, nm);
674 673
675 674 rw_enter(&ncl->ncl_lock, RW_READER);
676 675 if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
677 676 sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
678 677 dv->sdev_name, nm, curproc->p_user.u_comm));
679 678 if (sdev_nc_verbose) {
680 679 cmn_err(CE_CONT,
681 680 "?%s/%s: lookup by %s cached, no reconfig\n",
682 681 dv->sdev_name, nm, curproc->p_user.u_comm);
683 682 }
684 683 mutex_enter(&ncl->ncl_mutex);
685 684 lp->ncn_flags |= NCN_ACTIVE;
686 685 if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
687 686 lp->ncn_expirecnt < sdev_nc_expirecnt) {
688 687 lp->ncn_expirecnt = sdev_nc_expirecnt;
689 688 ncl->ncl_flags |= NCL_LIST_DIRTY;
690 689 changed = 1;
691 690 }
692 691 mutex_exit(&ncl->ncl_mutex);
693 692 rval = 1;
694 693 }
695 694 rw_exit(&ncl->ncl_lock);
696 695 kmem_free(path, n);
697 696 if (changed)
698 697 sdev_nc_flush_boot_update();
699 698 return (rval);
700 699 }
701 700
702 701 void
703 702 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
704 703 {
705 704 if (sdev_nc_disable)
706 705 return;
707 706
708 707 /*
709 708 * If we're still in the initial boot stage, always update
710 709 * the cache - we may not have received notice of the
711 710 * reconfig boot state yet. On a reconfigure boot, entries
712 711 * from the backing store are not re-persisted on update,
713 712 * but new entries are marked as needing an update.
714 713 * Never cache dynamic or non-global nodes.
715 714 */
716 715 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
717 716 !SDEV_IS_NO_NCACHE(dv) &&
718 717 ((failed_flags & SLF_NO_NCACHE) == 0) &&
719 718 ((sdev_reconfig_boot &&
720 719 (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
721 720 (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
722 721 sdev_nc_addname(sdev_ncache,
723 722 dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
724 723 }
725 724 }
726 725
727 726 static sdev_nc_list_t *
728 727 sdev_nc_newlist(void)
729 728 {
730 729 sdev_nc_list_t *ncl;
731 730
732 731 ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
733 732
734 733 rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
735 734 mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
736 735 list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
737 736 offsetof(sdev_nc_node_t, ncn_link));
738 737
739 738 return (ncl);
740 739 }
741 740
742 741 static void
743 742 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
744 743 {
745 744 kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
746 745 kmem_free(lp, sizeof (sdev_nc_node_t));
747 746 }
748 747
749 748 static sdev_nc_node_t *
750 749 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
751 750 {
752 751 sdev_nc_node_t *lp;
753 752
754 753 ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
755 754
756 755 for (lp = list_head(&ncl->ncl_list); lp;
757 756 lp = list_next(&ncl->ncl_list, lp)) {
758 757 if (strcmp(path, lp->ncn_name) == 0)
759 758 return (lp);
760 759 }
761 760
762 761 return (NULL);
763 762 }
764 763
765 764 static void
766 765 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
767 766 {
768 767 sdev_nc_node_t *lp;
769 768
770 769 rw_enter(&ncl->ncl_lock, RW_WRITER);
771 770
772 771 lp = sdev_nc_findpath(ncl, new->ncn_name);
773 772 if (lp == NULL) {
774 773 if (ncl->ncl_nentries == sdev_nc_max_entries) {
775 774 sdcmn_err5((
776 775 "%s by %s: not adding to ncache (max %d)\n",
777 776 new->ncn_name, curproc->p_user.u_comm,
778 777 ncl->ncl_nentries));
779 778 if (sdev_nc_verbose) {
780 779 cmn_err(CE_CONT, "?%s by %s: "
781 780 "not adding to ncache (max %d)\n",
782 781 new->ncn_name, curproc->p_user.u_comm,
783 782 ncl->ncl_nentries);
784 783 }
785 784 rw_exit(&ncl->ncl_lock);
786 785 sdev_nc_free_unlinked_node(new);
787 786 } else {
788 787
789 788 list_insert_tail(&ncl->ncl_list, new);
790 789 ncl->ncl_nentries++;
791 790
792 791 /* don't mark list dirty for nodes from store */
793 792 mutex_enter(&ncl->ncl_mutex);
794 793 if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
795 794 sdcmn_err5(("%s by %s: add to ncache\n",
796 795 new->ncn_name, curproc->p_user.u_comm));
797 796 if (sdev_nc_verbose) {
798 797 cmn_err(CE_CONT,
799 798 "?%s by %s: add to ncache\n",
800 799 new->ncn_name,
801 800 curproc->p_user.u_comm);
802 801 }
803 802 ncl->ncl_flags |= NCL_LIST_DIRTY;
804 803 }
805 804 mutex_exit(&ncl->ncl_mutex);
806 805 rw_exit(&ncl->ncl_lock);
807 806 lp = new;
808 807 sdev_nc_flush_boot_update();
809 808 }
810 809 } else {
811 810 mutex_enter(&ncl->ncl_mutex);
812 811 lp->ncn_flags |= new->ncn_flags;
813 812 mutex_exit(&ncl->ncl_mutex);
814 813 rw_exit(&ncl->ncl_lock);
815 814 sdev_nc_free_unlinked_node(new);
816 815 }
817 816 }
818 817
819 818 void
820 819 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
821 820 {
822 821 int n;
823 822 sdev_nc_node_t *lp;
824 823
825 824 ASSERT(SDEVTOV(dv)->v_type == VDIR);
826 825
827 826 lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
828 827
829 828 n = strlen(dv->sdev_path) + strlen(nm) + 2;
830 829 lp->ncn_name = kmem_alloc(n, KM_SLEEP);
831 830 (void) sprintf(lp->ncn_name, "%s/%s",
832 831 dv->sdev_path, nm);
833 832 lp->ncn_flags = flags;
834 833 lp->ncn_expirecnt = sdev_nc_expirecnt;
835 834 sdev_nc_insertnode(ncl, lp);
836 835 }
837 836
838 837 void
839 838 sdev_nc_node_exists(sdev_node_t *dv)
840 839 {
841 840 /* dynamic and non-global nodes are never cached */
842 841 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
843 842 !SDEV_IS_NO_NCACHE(dv)) {
844 843 sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
845 844 }
846 845 }
847 846
848 847 void
849 848 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
850 849 {
851 850 sdev_nc_node_t *lp;
852 851
853 852 if (sdev_nc_disable)
854 853 return;
855 854
856 855 rw_enter(&ncl->ncl_lock, RW_READER);
857 856 if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
858 857 rw_exit(&ncl->ncl_lock);
859 858 return;
860 859 }
861 860 if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
862 861 rw_exit(&ncl->ncl_lock);
863 862 rw_enter(&ncl->ncl_lock, RW_WRITER);
864 863 lp = sdev_nc_findpath(ncl, path);
865 864 }
866 865 if (lp) {
867 866 list_remove(&ncl->ncl_list, lp);
868 867 ncl->ncl_nentries--;
869 868 mutex_enter(&ncl->ncl_mutex);
870 869 ncl->ncl_flags |= NCL_LIST_DIRTY;
871 870 if (ncl->ncl_flags & NCL_LIST_WENABLE) {
872 871 mutex_exit(&ncl->ncl_mutex);
873 872 rw_exit(&ncl->ncl_lock);
874 873 sdev_nc_flush_updates();
875 874 } else {
876 875 mutex_exit(&ncl->ncl_mutex);
877 876 rw_exit(&ncl->ncl_lock);
878 877 }
879 878 sdev_nc_free_unlinked_node(lp);
880 879 sdcmn_err5(("%s by %s: removed from ncache\n",
881 880 path, curproc->p_user.u_comm));
882 881 if (sdev_nc_verbose) {
883 882 cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
884 883 path, curproc->p_user.u_comm);
885 884 }
886 885 } else
887 886 rw_exit(&ncl->ncl_lock);
888 887 }
889 888
890 889 static void
891 890 sdev_nc_free_bootonly(void)
892 891 {
893 892 sdev_nc_list_t *ncl = sdev_ncache;
894 893 sdev_nc_node_t *lp;
895 894 sdev_nc_node_t *next;
896 895
897 896 rw_enter(&ncl->ncl_lock, RW_WRITER);
898 897
899 898 for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
900 899 next = list_next(&ncl->ncl_list, lp);
901 900 if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
902 901 sdcmn_err5(("freeing %s\n", lp->ncn_name));
903 902 mutex_enter(&ncl->ncl_mutex);
904 903 ncl->ncl_flags |= NCL_LIST_DIRTY;
905 904 mutex_exit(&ncl->ncl_mutex);
906 905 list_remove(&ncl->ncl_list, lp);
907 906 sdev_nc_free_unlinked_node(lp);
908 907 ncl->ncl_nentries--;
909 908 }
910 909 }
911 910
912 911 rw_exit(&ncl->ncl_lock);
913 912 }
↓ open down ↓ |
558 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX