1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2012, Josef 'Jeff' Sipek <jeffpc@31bits.net>. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/vm.h> 31 #include <sys/proc.h> 32 #include <sys/file.h> 33 #include <sys/conf.h> 34 #include <sys/kmem.h> 35 #include <sys/mem.h> 36 #include <sys/mman.h> 37 #include <sys/vnode.h> 38 #include <sys/errno.h> 39 #include <sys/memlist.h> 40 #include <sys/dumphdr.h> 41 #include <sys/dumpadm.h> 42 #include <sys/ksyms.h> 43 #include <sys/compress.h> 44 #include <sys/stream.h> 45 #include <sys/strsun.h> 46 #include <sys/cmn_err.h> 47 #include <sys/bitmap.h> 48 #include <sys/modctl.h> 49 #include <sys/utsname.h> 50 #include <sys/systeminfo.h> 51 #include <sys/vmem.h> 52 #include <sys/log.h> 53 #include <sys/var.h> 54 #include <sys/debug.h> 55 #include <sys/sunddi.h> 56 #include <fs/fs_subr.h> 57 #include <sys/fs/snode.h> 58 #include <sys/ontrap.h> 59 #include <sys/panic.h> 60 #include <sys/dkio.h> 61 #include <sys/vtoc.h> 62 #include <sys/errorq.h> 63 #include <sys/fm/util.h> 64 #include <sys/fs/zfs.h> 65 66 #include <vm/hat.h> 67 #include <vm/as.h> 68 #include <vm/page.h> 69 #include <vm/pvn.h> 70 #include <vm/seg.h> 71 #include <vm/seg_kmem.h> 72 #include <sys/clock_impl.h> 73 #include <sys/hold_page.h> 74 75 /* 76 * exported vars 77 */ 78 kmutex_t dump_lock; /* lock for dump configuration */ 79 dumphdr_t *dumphdr; /* dump header */ 80 int dump_conflags = DUMP_KERNEL; /* dump configuration flags */ 81 vnode_t *dumpvp; /* dump device vnode pointer */ 82 u_offset_t dumpvp_size; /* size of dump device, in bytes */ 83 char *dumppath; /* pathname of dump device */ 84 int dump_timeout = 120; /* timeout for dumping pages */ 85 int dump_timeleft; /* portion of dump_timeout remaining */ 86 int dump_ioerr; /* dump i/o error */ 87 char *dump_stack_scratch; /* scratch area for saving stack summary */ 88 89 /* 90 * Tunables for dump. These can be set via /etc/system. 91 * 92 * dump_metrics_on if set, metrics are collected in the kernel, passed 93 * to savecore via the dump file, and recorded by savecore in 94 * METRICS.txt. 95 */ 96 97 /* tunables for pre-reserved heap */ 98 uint_t dump_kmem_permap = 1024; 99 uint_t dump_kmem_pages = 8; 100 101 /* 102 * Compression metrics are accumulated nano-second subtotals. The 103 * results are normalized by the number of pages dumped. A report is 104 * generated when dumpsys() completes and is saved in the dump image 105 * after the trailing dump header. 106 * 107 * Metrics are always collected. Set the variable dump_metrics_on to 108 * cause metrics to be saved in the crash file, where savecore will 109 * save it in the file METRICS.txt. 110 */ 111 #define PERPAGES \ 112 PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \ 113 PERPAGE(copy) PERPAGE(compress) \ 114 PERPAGE(write) \ 115 PERPAGE(inwait) PERPAGE(outwait) 116 117 typedef struct perpage { 118 #define PERPAGE(x) hrtime_t x; 119 PERPAGES 120 #undef PERPAGE 121 } perpage_t; 122 123 /* 124 * This macro controls the code generation for collecting dump 125 * performance information. By default, the code is generated, but 126 * automatic saving of the information is disabled. If dump_metrics_on 127 * is set to 1, the timing information is passed to savecore via the 128 * crash file, where it is appended to the file dump-dir/METRICS.txt. 129 */ 130 #define COLLECT_METRICS 131 132 #ifdef COLLECT_METRICS 133 uint_t dump_metrics_on = 0; /* set to 1 to enable recording metrics */ 134 135 #define HRSTART(v, m) v##ts.m = gethrtime() 136 #define HRSTOP(v, m) v.m += gethrtime() - v##ts.m 137 #define HRBEGIN(v, m, s) v##ts.m = gethrtime(); v.size += s 138 #define HREND(v, m) v.m += gethrtime() - v##ts.m 139 #define HRNORM(v, m, n) v.m /= (n) 140 141 #else 142 #define HRSTART(v, m) 143 #define HRSTOP(v, m) 144 #define HRBEGIN(v, m, s) 145 #define HREND(v, m) 146 #define HRNORM(v, m, n) 147 #endif /* COLLECT_METRICS */ 148 149 static char dump_osimage_uuid[36 + 1]; 150 151 #define isdigit(ch) ((ch) >= '0' && (ch) <= '9') 152 #define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \ 153 ((ch) >= 'A' && (ch) <= 'F')) 154 155 /* 156 * Dynamic state when dumpsys() is running. 157 */ 158 typedef struct dumpsync { 159 pgcnt_t npages; /* subtotal of pages dumped */ 160 pgcnt_t pages_mapped; /* subtotal of pages mapped */ 161 pgcnt_t pages_used; /* subtotal of pages used per map */ 162 size_t nwrite; /* subtotal of bytes written */ 163 uint_t percent; /* dump progress */ 164 uint_t percent_done; /* dump progress reported */ 165 hrtime_t start; /* start time */ 166 hrtime_t elapsed; /* elapsed time when completed */ 167 hrtime_t iotime; /* time spent writing nwrite bytes */ 168 hrtime_t iowait; /* time spent waiting for output */ 169 hrtime_t iowaitts; /* iowait timestamp */ 170 perpage_t perpage; /* metrics */ 171 perpage_t perpagets; 172 } dumpsync_t; 173 174 static dumpsync_t dumpsync; /* synchronization vars */ 175 176 /* 177 * configuration vars for dumpsys 178 */ 179 typedef struct dumpcfg { 180 perpage_t perpage; /* per page metrics */ 181 perpage_t perpagets; /* per page metrics (timestamps) */ 182 char *page; /* buffer for page copy */ 183 char *lzbuf; /* lzjb output */ 184 185 char *cmap; /* array of input (map) buffers */ 186 ulong_t *bitmap; /* bitmap for marking pages to dump */ 187 pgcnt_t bitmapsize; /* size of bitmap */ 188 pid_t *pids; /* list of process IDs at dump time */ 189 } dumpcfg_t; 190 191 static dumpcfg_t dumpcfg; /* config vars */ 192 193 /* 194 * The dump I/O buffer. 195 * 196 * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is 197 * sized according to the optimum device transfer speed. 198 */ 199 typedef struct dumpbuf { 200 vnode_t *cdev_vp; /* VCHR open of the dump device */ 201 len_t vp_limit; /* maximum write offset */ 202 offset_t vp_off; /* current dump device offset */ 203 char *cur; /* dump write pointer */ 204 char *start; /* dump buffer address */ 205 char *end; /* dump buffer end */ 206 size_t size; /* size of dumpbuf in bytes */ 207 size_t iosize; /* best transfer size for device */ 208 } dumpbuf_t; 209 210 static dumpbuf_t dumpbuf; /* I/O buffer */ 211 212 /* 213 * The dump I/O buffer must be at least one page, at most xfer_size 214 * bytes, and should scale with physmem in between. The transfer size 215 * passed in will either represent a global default (maxphys) or the 216 * best size for the device. The size of the dumpbuf I/O buffer is 217 * limited by dumpbuf_limit (8MB by default) because the dump 218 * performance saturates beyond a certain size. The default is to 219 * select 1/4096 of the memory. 220 */ 221 static int dumpbuf_fraction = 12; /* memory size scale factor */ 222 static size_t dumpbuf_limit = 8 << 20; /* max I/O buf size */ 223 224 static size_t 225 dumpbuf_iosize(size_t xfer_size) 226 { 227 size_t iosize = ptob(physmem >> dumpbuf_fraction); 228 229 if (iosize < PAGESIZE) 230 iosize = PAGESIZE; 231 else if (iosize > xfer_size) 232 iosize = xfer_size; 233 if (iosize > dumpbuf_limit) 234 iosize = dumpbuf_limit; 235 return (iosize & PAGEMASK); 236 } 237 238 /* 239 * resize the I/O buffer 240 */ 241 static void 242 dumpbuf_resize(void) 243 { 244 char *old_buf = dumpbuf.start; 245 size_t old_size = dumpbuf.size; 246 char *new_buf; 247 size_t new_size; 248 249 ASSERT(MUTEX_HELD(&dump_lock)); 250 251 new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys)); 252 if (new_size <= old_size) 253 return; /* no need to reallocate buffer */ 254 255 new_buf = kmem_alloc(new_size, KM_SLEEP); 256 dumpbuf.size = new_size; 257 dumpbuf.start = new_buf; 258 dumpbuf.end = new_buf + new_size; 259 kmem_free(old_buf, old_size); 260 } 261 262 /* 263 * dump_update_clevel is called when dumpadm configures the dump device. 264 * Allocate the minimum configuration for now. 265 * 266 * When the dump file is configured we reserve a minimum amount of 267 * memory for use at crash time. But we reserve VA for all the memory 268 * we really want in order to do the fastest dump possible. The VA is 269 * backed by pages not being dumped, according to the bitmap. If 270 * there is insufficient spare memory, however, we fall back to the 271 * minimum. 272 * 273 * Live dump (savecore -L) always uses the minimum config. 274 * 275 * For single-threaded dumps, the panic CPU does lzjb compression. 276 * 277 */ 278 static void 279 dump_update_clevel() 280 { 281 dumpcfg_t *old = &dumpcfg; 282 dumpcfg_t newcfg = *old; 283 dumpcfg_t *new = &newcfg; 284 285 ASSERT(MUTEX_HELD(&dump_lock)); 286 287 /* 288 * Free the previously allocated bufs and VM. 289 */ 290 if (old->lzbuf) 291 kmem_free(old->lzbuf, PAGESIZE); 292 if (old->page) 293 kmem_free(old->page, PAGESIZE); 294 295 if (old->cmap) 296 /* VM space for mapping pages */ 297 vmem_xfree(heap_arena, old->cmap, PAGESIZE); 298 299 /* 300 * Allocate new data structures and buffers, and also figure the max 301 * desired size. 302 */ 303 new->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP); 304 new->page = kmem_alloc(PAGESIZE, KM_SLEEP); 305 306 new->cmap = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 307 0, 0, NULL, NULL, VM_SLEEP); 308 309 /* 310 * Reserve memory for kmem allocation calls made during crash 311 * dump. The hat layer allocates memory for each mapping 312 * created, and the I/O path allocates buffers and data structs. 313 * Add a few pages for safety. 314 */ 315 kmem_dump_init(dump_kmem_permap + (dump_kmem_pages * PAGESIZE)); 316 317 /* set new config pointers */ 318 *old = *new; 319 } 320 321 /* 322 * Define a struct memlist walker to optimize bitnum to pfn 323 * lookup. The walker maintains the state of the list traversal. 324 */ 325 typedef struct dumpmlw { 326 struct memlist *mp; /* current memlist */ 327 pgcnt_t basenum; /* bitnum base offset */ 328 pgcnt_t mppages; /* current memlist size */ 329 pgcnt_t mpleft; /* size to end of current memlist */ 330 pfn_t mpaddr; /* first pfn in memlist */ 331 } dumpmlw_t; 332 333 /* initialize the walker */ 334 static inline void 335 dump_init_memlist_walker(dumpmlw_t *pw) 336 { 337 pw->mp = phys_install; 338 pw->basenum = 0; 339 pw->mppages = pw->mp->ml_size >> PAGESHIFT; 340 pw->mpleft = pw->mppages; 341 pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 342 } 343 344 /* 345 * Lookup pfn given bitnum. The memlist can be quite long on some 346 * systems (e.g.: one per board). To optimize sequential lookups, the 347 * caller initializes and presents a memlist walker. 348 */ 349 static pfn_t 350 dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw) 351 { 352 bitnum -= pw->basenum; 353 while (pw->mp != NULL) { 354 if (bitnum < pw->mppages) { 355 pw->mpleft = pw->mppages - bitnum; 356 return (pw->mpaddr + bitnum); 357 } 358 bitnum -= pw->mppages; 359 pw->basenum += pw->mppages; 360 pw->mp = pw->mp->ml_next; 361 if (pw->mp != NULL) { 362 pw->mppages = pw->mp->ml_size >> PAGESHIFT; 363 pw->mpleft = pw->mppages; 364 pw->mpaddr = pw->mp->ml_address >> PAGESHIFT; 365 } 366 } 367 return (PFN_INVALID); 368 } 369 370 static pgcnt_t 371 dump_pfn_to_bitnum(pfn_t pfn) 372 { 373 struct memlist *mp; 374 pgcnt_t bitnum = 0; 375 376 for (mp = phys_install; mp != NULL; mp = mp->ml_next) { 377 if (pfn >= (mp->ml_address >> PAGESHIFT) && 378 pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT)) 379 return (bitnum + pfn - (mp->ml_address >> PAGESHIFT)); 380 bitnum += mp->ml_size >> PAGESHIFT; 381 } 382 return ((pgcnt_t)-1); 383 } 384 385 static void 386 dumphdr_init(void) 387 { 388 pgcnt_t npages; 389 390 ASSERT(MUTEX_HELD(&dump_lock)); 391 392 if (dumphdr == NULL) { 393 dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP); 394 dumphdr->dump_magic = DUMP_MAGIC; 395 dumphdr->dump_version = DUMP_VERSION; 396 dumphdr->dump_wordsize = DUMP_WORDSIZE; 397 dumphdr->dump_pageshift = PAGESHIFT; 398 dumphdr->dump_pagesize = PAGESIZE; 399 dumphdr->dump_utsname = utsname; 400 (void) strcpy(dumphdr->dump_platform, platform); 401 dumpbuf.size = dumpbuf_iosize(maxphys); 402 dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP); 403 dumpbuf.end = dumpbuf.start + dumpbuf.size; 404 dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP); 405 dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP); 406 (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(), 407 sizeof (dumphdr->dump_uuid)); 408 } 409 410 npages = num_phys_pages(); 411 412 if (dumpcfg.bitmapsize != npages) { 413 void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP); 414 415 if (dumpcfg.bitmap != NULL) 416 kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg. 417 bitmapsize)); 418 dumpcfg.bitmap = map; 419 dumpcfg.bitmapsize = npages; 420 } 421 } 422 423 /* 424 * Establish a new dump device. 425 */ 426 int 427 dumpinit(vnode_t *vp, char *name, int justchecking) 428 { 429 vnode_t *cvp; 430 vattr_t vattr; 431 vnode_t *cdev_vp; 432 int error = 0; 433 434 ASSERT(MUTEX_HELD(&dump_lock)); 435 436 dumphdr_init(); 437 438 cvp = common_specvp(vp); 439 if (cvp == dumpvp) 440 return (0); 441 442 /* 443 * Determine whether this is a plausible dump device. We want either: 444 * (1) a real device that's not mounted and has a cb_dump routine, or 445 * (2) a swapfile on some filesystem that has a vop_dump routine. 446 */ 447 if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0) 448 return (error); 449 450 vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV; 451 if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) { 452 if (vattr.va_type == VBLK || vattr.va_type == VCHR) { 453 if (devopsp[getmajor(vattr.va_rdev)]-> 454 devo_cb_ops->cb_dump == nodev) 455 error = ENOTSUP; 456 else if (vfs_devismounted(vattr.va_rdev)) 457 error = EBUSY; 458 if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip), 459 ZFS_DRIVER) == 0 && 460 IS_SWAPVP(common_specvp(cvp))) 461 error = EBUSY; 462 } else { 463 if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) || 464 !IS_SWAPVP(cvp)) 465 error = ENOTSUP; 466 } 467 } 468 469 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) 470 error = ENOSPC; 471 472 if (error || justchecking) { 473 (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0, 474 kcred, NULL); 475 return (error); 476 } 477 478 VN_HOLD(cvp); 479 480 if (dumpvp != NULL) 481 dumpfini(); /* unconfigure the old dump device */ 482 483 dumpvp = cvp; 484 dumpvp_size = vattr.va_size & -DUMP_OFFSET; 485 dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP); 486 (void) strcpy(dumppath, name); 487 dumpbuf.iosize = 0; 488 489 /* 490 * If the dump device is a block device, attempt to open up the 491 * corresponding character device and determine its maximum transfer 492 * size. We use this information to potentially resize dumpbuf to a 493 * larger and more optimal size for performing i/o to the dump device. 494 */ 495 if (cvp->v_type == VBLK && 496 (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) { 497 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 498 size_t blk_size; 499 struct dk_cinfo dki; 500 struct dk_minfo minf; 501 502 if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO, 503 (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL) 504 == 0 && minf.dki_lbsize != 0) 505 blk_size = minf.dki_lbsize; 506 else 507 blk_size = DEV_BSIZE; 508 509 if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki, 510 FKIOCTL, kcred, NULL, NULL) == 0) { 511 dumpbuf.iosize = dki.dki_maxtransfer * blk_size; 512 dumpbuf_resize(); 513 } 514 /* 515 * If we are working with a zvol then dumpify it 516 * if it's not being used as swap. 517 */ 518 if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) { 519 if (IS_SWAPVP(common_specvp(cvp))) 520 error = EBUSY; 521 else if ((error = VOP_IOCTL(cdev_vp, 522 DKIOCDUMPINIT, NULL, FKIOCTL, kcred, 523 NULL, NULL)) != 0) 524 dumpfini(); 525 } 526 527 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 528 kcred, NULL); 529 } 530 531 VN_RELE(cdev_vp); 532 } 533 534 cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20); 535 536 dump_update_clevel(); 537 538 return (error); 539 } 540 541 void 542 dumpfini(void) 543 { 544 vattr_t vattr; 545 boolean_t is_zfs = B_FALSE; 546 vnode_t *cdev_vp; 547 ASSERT(MUTEX_HELD(&dump_lock)); 548 549 kmem_free(dumppath, strlen(dumppath) + 1); 550 551 /* 552 * Determine if we are using zvols for our dump device 553 */ 554 vattr.va_mask = AT_RDEV; 555 if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) { 556 is_zfs = (getmajor(vattr.va_rdev) == 557 ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE; 558 } 559 560 /* 561 * If we have a zvol dump device then we call into zfs so 562 * that it may have a chance to cleanup. 563 */ 564 if (is_zfs && 565 (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) { 566 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { 567 (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL, 568 kcred, NULL, NULL); 569 (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0, 570 kcred, NULL); 571 } 572 VN_RELE(cdev_vp); 573 } 574 575 (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL); 576 577 VN_RELE(dumpvp); 578 579 dumpvp = NULL; 580 dumpvp_size = 0; 581 dumppath = NULL; 582 } 583 584 static offset_t 585 dumpvp_flush(void) 586 { 587 size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE); 588 hrtime_t iotime; 589 int err; 590 591 if (dumpbuf.vp_off + size > dumpbuf.vp_limit) { 592 dump_ioerr = ENOSPC; 593 dumpbuf.vp_off = dumpbuf.vp_limit; 594 } else if (size != 0) { 595 iotime = gethrtime(); 596 dumpsync.iowait += iotime - dumpsync.iowaitts; 597 if (panicstr) 598 err = VOP_DUMP(dumpvp, dumpbuf.start, 599 lbtodb(dumpbuf.vp_off), btod(size), NULL); 600 else 601 err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ? 602 dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size, 603 dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit, 604 kcred, 0); 605 if (err && dump_ioerr == 0) 606 dump_ioerr = err; 607 dumpsync.iowaitts = gethrtime(); 608 dumpsync.iotime += dumpsync.iowaitts - iotime; 609 dumpsync.nwrite += size; 610 dumpbuf.vp_off += size; 611 } 612 dumpbuf.cur = dumpbuf.start; 613 dump_timeleft = dump_timeout; 614 return (dumpbuf.vp_off); 615 } 616 617 /* maximize write speed by keeping seek offset aligned with size */ 618 void 619 dumpvp_write(const void *va, size_t size) 620 { 621 size_t len, off, sz; 622 623 while (size != 0) { 624 len = MIN(size, dumpbuf.end - dumpbuf.cur); 625 if (len == 0) { 626 off = P2PHASE(dumpbuf.vp_off, dumpbuf.size); 627 if (off == 0 || !ISP2(dumpbuf.size)) { 628 (void) dumpvp_flush(); 629 } else { 630 sz = dumpbuf.size - off; 631 dumpbuf.cur = dumpbuf.start + sz; 632 (void) dumpvp_flush(); 633 ovbcopy(dumpbuf.start + sz, dumpbuf.start, off); 634 dumpbuf.cur += off; 635 } 636 } else { 637 bcopy(va, dumpbuf.cur, len); 638 va = (char *)va + len; 639 dumpbuf.cur += len; 640 size -= len; 641 } 642 } 643 } 644 645 /*ARGSUSED*/ 646 static void 647 dumpvp_ksyms_write(const void *src, void *dst, size_t size) 648 { 649 dumpvp_write(src, size); 650 } 651 652 /* 653 * Mark 'pfn' in the bitmap and dump its translation table entry. 654 */ 655 void 656 dump_addpage(struct as *as, void *va, pfn_t pfn) 657 { 658 mem_vtop_t mem_vtop; 659 pgcnt_t bitnum; 660 661 if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 662 if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 663 dumphdr->dump_npages++; 664 BT_SET(dumpcfg.bitmap, bitnum); 665 } 666 dumphdr->dump_nvtop++; 667 mem_vtop.m_as = as; 668 mem_vtop.m_va = va; 669 mem_vtop.m_pfn = pfn; 670 dumpvp_write(&mem_vtop, sizeof (mem_vtop_t)); 671 } 672 dump_timeleft = dump_timeout; 673 } 674 675 /* 676 * Mark 'pfn' in the bitmap 677 */ 678 void 679 dump_page(pfn_t pfn) 680 { 681 pgcnt_t bitnum; 682 683 if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) { 684 if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 685 dumphdr->dump_npages++; 686 BT_SET(dumpcfg.bitmap, bitnum); 687 } 688 } 689 dump_timeleft = dump_timeout; 690 } 691 692 /* 693 * Dump the <as, va, pfn> information for a given address space. 694 * SEGOP_DUMP() will call dump_addpage() for each page in the segment. 695 */ 696 static void 697 dump_as(struct as *as) 698 { 699 struct seg *seg; 700 701 AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 702 for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 703 if (seg->s_as != as) 704 break; 705 if (seg->s_ops == NULL) 706 continue; 707 SEGOP_DUMP(seg); 708 } 709 AS_LOCK_EXIT(as, &as->a_lock); 710 711 if (seg != NULL) 712 cmn_err(CE_WARN, "invalid segment %p in address space %p", 713 (void *)seg, (void *)as); 714 } 715 716 static int 717 dump_process(pid_t pid) 718 { 719 proc_t *p = sprlock(pid); 720 721 if (p == NULL) 722 return (-1); 723 if (p->p_as != &kas) { 724 mutex_exit(&p->p_lock); 725 dump_as(p->p_as); 726 mutex_enter(&p->p_lock); 727 } 728 729 sprunlock(p); 730 731 return (0); 732 } 733 734 /* 735 * The following functions (dump_summary(), dump_ereports(), and 736 * dump_messages()), write data to an uncompressed area within the 737 * crashdump. The layout of these is 738 * 739 * +------------------------------------------------------------+ 740 * | compressed pages | summary | ereports | messages | 741 * +------------------------------------------------------------+ 742 * 743 * With the advent of saving a compressed crash dump by default, we 744 * need to save a little more data to describe the failure mode in 745 * an uncompressed buffer available before savecore uncompresses 746 * the dump. Initially this is a copy of the stack trace. Additional 747 * summary information should be added here. 748 */ 749 750 void 751 dump_summary(void) 752 { 753 u_offset_t dumpvp_start; 754 summary_dump_t sd; 755 756 if (dumpvp == NULL || dumphdr == NULL) 757 return; 758 759 dumpbuf.cur = dumpbuf.start; 760 761 dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE + 762 DUMP_ERPTSIZE); 763 dumpvp_start = dumpbuf.vp_limit - DUMP_SUMMARYSIZE; 764 dumpbuf.vp_off = dumpvp_start; 765 766 sd.sd_magic = SUMMARY_MAGIC; 767 sd.sd_ssum = checksum32(dump_stack_scratch, STACK_BUF_SIZE); 768 dumpvp_write(&sd, sizeof (sd)); 769 dumpvp_write(dump_stack_scratch, STACK_BUF_SIZE); 770 771 sd.sd_magic = 0; /* indicate end of summary */ 772 dumpvp_write(&sd, sizeof (sd)); 773 (void) dumpvp_flush(); 774 } 775 776 void 777 dump_ereports(void) 778 { 779 u_offset_t dumpvp_start; 780 erpt_dump_t ed; 781 782 if (dumpvp == NULL || dumphdr == NULL) 783 return; 784 785 dumpbuf.cur = dumpbuf.start; 786 dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE); 787 dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE; 788 dumpbuf.vp_off = dumpvp_start; 789 790 fm_ereport_dump(); 791 if (panicstr) 792 errorq_dump(); 793 794 bzero(&ed, sizeof (ed)); /* indicate end of ereports */ 795 dumpvp_write(&ed, sizeof (ed)); 796 (void) dumpvp_flush(); 797 798 if (!panicstr) { 799 (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 800 (size_t)(dumpbuf.vp_off - dumpvp_start), 801 B_INVAL | B_FORCE, kcred, NULL); 802 } 803 } 804 805 void 806 dump_messages(void) 807 { 808 log_dump_t ld; 809 mblk_t *mctl, *mdata; 810 queue_t *q, *qlast; 811 u_offset_t dumpvp_start; 812 813 if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL) 814 return; 815 816 dumpbuf.cur = dumpbuf.start; 817 dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET; 818 dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE; 819 dumpbuf.vp_off = dumpvp_start; 820 821 qlast = NULL; 822 do { 823 for (q = log_consq; q->q_next != qlast; q = q->q_next) 824 continue; 825 for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) { 826 dump_timeleft = dump_timeout; 827 mdata = mctl->b_cont; 828 ld.ld_magic = LOG_MAGIC; 829 ld.ld_msgsize = MBLKL(mctl->b_cont); 830 ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl)); 831 ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata)); 832 dumpvp_write(&ld, sizeof (ld)); 833 dumpvp_write(mctl->b_rptr, MBLKL(mctl)); 834 dumpvp_write(mdata->b_rptr, MBLKL(mdata)); 835 } 836 } while ((qlast = q) != log_consq); 837 838 ld.ld_magic = 0; /* indicate end of messages */ 839 dumpvp_write(&ld, sizeof (ld)); 840 (void) dumpvp_flush(); 841 if (!panicstr) { 842 (void) VOP_PUTPAGE(dumpvp, dumpvp_start, 843 (size_t)(dumpbuf.vp_off - dumpvp_start), 844 B_INVAL | B_FORCE, kcred, NULL); 845 } 846 } 847 848 /* 849 * Copy pages, trapping ECC errors. Also, for robustness, trap data 850 * access in case something goes wrong in the hat layer and the 851 * mapping is broken. 852 */ 853 static int 854 dump_pagecopy(void *src, void *dst) 855 { 856 long *wsrc = (long *)src; 857 long *wdst = (long *)dst; 858 const ulong_t ncopies = PAGESIZE / sizeof (long); 859 volatile int w = 0; 860 volatile int ueoff = -1; 861 on_trap_data_t otd; 862 863 if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) { 864 if (ueoff == -1) 865 ueoff = w * sizeof (long); 866 /* report "bad ECC" or "bad address" */ 867 #ifdef _LP64 868 if (otd.ot_trap & OT_DATA_EC) 869 wdst[w++] = 0x00badecc00badecc; 870 else 871 wdst[w++] = 0x00badadd00badadd; 872 #else 873 if (otd.ot_trap & OT_DATA_EC) 874 wdst[w++] = 0x00badecc; 875 else 876 wdst[w++] = 0x00badadd; 877 #endif 878 } 879 while (w < ncopies) { 880 wdst[w] = wsrc[w]; 881 w++; 882 } 883 no_trap(); 884 return (ueoff); 885 } 886 887 #ifdef COLLECT_METRICS 888 size_t 889 dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size) 890 { 891 dumpcfg_t *cfg = &dumpcfg; 892 int myid = CPU->cpu_seqid; 893 int i, compress_ratio; 894 int sec, iorate; 895 char *e = buf + size; 896 char *p = buf; 897 898 sec = ds->elapsed / (1000 * 1000 * 1000ULL); 899 if (sec < 1) 900 sec = 1; 901 902 if (ds->iotime < 1) 903 ds->iotime = 1; 904 iorate = (ds->nwrite * 100000ULL) / ds->iotime; 905 906 compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1); 907 908 #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0) 909 910 P("Master cpu_seqid,%d\n", CPU->cpu_seqid); 911 P("Master cpu_id,%d\n", CPU->cpu_id); 912 P("dump_flags,0x%x\n", dumphdr->dump_flags); 913 P("dump_ioerr,%d\n", dump_ioerr); 914 915 P("Compression type,serial lzjb\n"); 916 P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio % 917 100); 918 919 P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100); 920 P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite); 921 P("..total nsec,%lld\n", (u_longlong_t)ds->iotime); 922 P("dumpbuf.iosize,%ld\n", dumpbuf.iosize); 923 P("dumpbuf.size,%ld\n", dumpbuf.size); 924 925 P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec); 926 P("Dump pages,%llu\n", (u_longlong_t)ds->npages); 927 P("Dump time,%d\n", sec); 928 929 if (ds->pages_mapped > 0) 930 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used) 931 / ds->pages_mapped)); 932 933 P("\nPer-page metrics:\n"); 934 if (ds->npages > 0) { 935 #define PERPAGE(x) ds->perpage.x += cfg->perpage.x; 936 PERPAGES; 937 #undef PERPAGE 938 #define PERPAGE(x) \ 939 P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages)); 940 PERPAGES; 941 #undef PERPAGE 942 943 P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait / 944 ds->npages)); 945 } 946 #undef P 947 if (p < e) 948 bzero(p, e - p); 949 return (p - buf); 950 } 951 #endif /* COLLECT_METRICS */ 952 953 /* 954 * Dump the system. 955 */ 956 void 957 dumpsys(void) 958 { 959 dumpsync_t *ds = &dumpsync; 960 pfn_t pfn; 961 pgcnt_t bitnum; 962 proc_t *p; 963 pid_t npids, pidx; 964 char *content; 965 char *buf; 966 size_t size; 967 dumpmlw_t mlw; 968 dumpcsize_t datatag; 969 dumpdatahdr_t datahdr; 970 971 if (dumpvp == NULL || dumphdr == NULL) { 972 uprintf("skipping system dump - no dump device configured\n"); 973 return; 974 } 975 dumpbuf.cur = dumpbuf.start; 976 977 /* clear the sync variables */ 978 bzero(ds, sizeof (*ds)); 979 980 /* 981 * Calculate the starting block for dump. If we're dumping on a 982 * swap device, start 1/5 of the way in; otherwise, start at the 983 * beginning. And never use the first page -- it may be a disk label. 984 */ 985 if (dumpvp->v_flag & VISSWAP) 986 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET); 987 else 988 dumphdr->dump_start = DUMP_OFFSET; 989 990 dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED; 991 dumphdr->dump_crashtime = gethrestime_sec(); 992 dumphdr->dump_npages = 0; 993 dumphdr->dump_nvtop = 0; 994 bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize)); 995 dump_timeleft = dump_timeout; 996 997 if (panicstr) { 998 dumphdr->dump_flags &= ~DF_LIVE; 999 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL); 1000 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL); 1001 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE, 1002 panicstr, panicargs); 1003 1004 } 1005 1006 if (dump_conflags & DUMP_ALL) 1007 content = "all"; 1008 else if (dump_conflags & DUMP_CURPROC) 1009 content = "kernel + curproc"; 1010 else 1011 content = "kernel"; 1012 uprintf("dumping to %s, offset %lld, content: %s\n", dumppath, 1013 dumphdr->dump_start, content); 1014 1015 /* Make sure nodename is current */ 1016 bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN); 1017 1018 /* 1019 * If this is a live dump, try to open a VCHR vnode for better 1020 * performance. We must take care to flush the buffer cache 1021 * first. 1022 */ 1023 if (!panicstr) { 1024 vnode_t *cdev_vp, *cmn_cdev_vp; 1025 1026 ASSERT(dumpbuf.cdev_vp == NULL); 1027 cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR); 1028 if (cdev_vp != NULL) { 1029 cmn_cdev_vp = common_specvp(cdev_vp); 1030 if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL) 1031 == 0) { 1032 if (vn_has_cached_data(dumpvp)) 1033 (void) pvn_vplist_dirty(dumpvp, 0, NULL, 1034 B_INVAL | B_TRUNC, kcred); 1035 dumpbuf.cdev_vp = cmn_cdev_vp; 1036 } else { 1037 VN_RELE(cdev_vp); 1038 } 1039 } 1040 } 1041 1042 /* 1043 * Store a hires timestamp so we can look it up during debugging. 1044 */ 1045 lbolt_debug_entry(); 1046 1047 /* 1048 * Leave room for the message and ereport save areas and terminal dump 1049 * header. 1050 */ 1051 dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET - 1052 DUMP_ERPTSIZE; 1053 1054 /* 1055 * Write out the symbol table. It's no longer compressed, 1056 * so its 'size' and 'csize' are equal. 1057 */ 1058 dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE; 1059 dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize = 1060 ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX); 1061 1062 /* 1063 * Write out the translation map. 1064 */ 1065 dumphdr->dump_map = dumpvp_flush(); 1066 dump_as(&kas); 1067 dumphdr->dump_nvtop += dump_plat_addr(); 1068 1069 /* 1070 * call into hat, which may have unmapped pages that also need to 1071 * be in the dump 1072 */ 1073 hat_dump(); 1074 1075 if (dump_conflags & DUMP_ALL) { 1076 mutex_enter(&pidlock); 1077 1078 for (npids = 0, p = practive; p != NULL; p = p->p_next) 1079 dumpcfg.pids[npids++] = p->p_pid; 1080 1081 mutex_exit(&pidlock); 1082 1083 for (pidx = 0; pidx < npids; pidx++) 1084 (void) dump_process(dumpcfg.pids[pidx]); 1085 1086 dump_init_memlist_walker(&mlw); 1087 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 1088 dump_timeleft = dump_timeout; 1089 pfn = dump_bitnum_to_pfn(bitnum, &mlw); 1090 /* 1091 * Some hypervisors do not have all pages available to 1092 * be accessed by the guest OS. Check for page 1093 * accessibility. 1094 */ 1095 if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) != 1096 PLAT_HOLD_OK) 1097 continue; 1098 BT_SET(dumpcfg.bitmap, bitnum); 1099 } 1100 dumphdr->dump_npages = dumpcfg.bitmapsize; 1101 dumphdr->dump_flags |= DF_ALL; 1102 1103 } else if (dump_conflags & DUMP_CURPROC) { 1104 /* 1105 * Determine which pid is to be dumped. If we're panicking, we 1106 * dump the process associated with panic_thread (if any). If 1107 * this is a live dump, we dump the process associated with 1108 * curthread. 1109 */ 1110 npids = 0; 1111 if (panicstr) { 1112 if (panic_thread != NULL && 1113 panic_thread->t_procp != NULL && 1114 panic_thread->t_procp != &p0) { 1115 dumpcfg.pids[npids++] = 1116 panic_thread->t_procp->p_pid; 1117 } 1118 } else { 1119 dumpcfg.pids[npids++] = curthread->t_procp->p_pid; 1120 } 1121 1122 if (npids && dump_process(dumpcfg.pids[0]) == 0) 1123 dumphdr->dump_flags |= DF_CURPROC; 1124 else 1125 dumphdr->dump_flags |= DF_KERNEL; 1126 1127 } else { 1128 dumphdr->dump_flags |= DF_KERNEL; 1129 } 1130 1131 dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1; 1132 1133 /* 1134 * Write out the pfn table. 1135 */ 1136 dumphdr->dump_pfn = dumpvp_flush(); 1137 dump_init_memlist_walker(&mlw); 1138 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 1139 dump_timeleft = dump_timeout; 1140 if (!BT_TEST(dumpcfg.bitmap, bitnum)) 1141 continue; 1142 pfn = dump_bitnum_to_pfn(bitnum, &mlw); 1143 ASSERT(pfn != PFN_INVALID); 1144 dumpvp_write(&pfn, sizeof (pfn_t)); 1145 } 1146 dump_plat_pfn(); 1147 1148 /* 1149 * Write out all the pages. 1150 * Map pages, copy them handling UEs, compress, and write them out. 1151 */ 1152 dumphdr->dump_data = dumpvp_flush(); 1153 1154 ASSERT(dumpcfg.page); 1155 bzero(&dumpcfg.perpage, sizeof (dumpcfg.perpage)); 1156 1157 ds->start = gethrtime(); 1158 ds->iowaitts = ds->start; 1159 1160 if (panicstr) 1161 kmem_dump_begin(); 1162 1163 dump_init_memlist_walker(&mlw); 1164 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) { 1165 size_t csize; 1166 1167 dump_timeleft = dump_timeout; 1168 HRSTART(ds->perpage, bitmap); 1169 if (!BT_TEST(dumpcfg.bitmap, bitnum)) { 1170 HRSTOP(ds->perpage, bitmap); 1171 continue; 1172 } 1173 HRSTOP(ds->perpage, bitmap); 1174 1175 pfn = dump_bitnum_to_pfn(bitnum, &mlw); 1176 ASSERT(pfn != PFN_INVALID); 1177 1178 HRSTART(ds->perpage, map); 1179 hat_devload(kas.a_hat, dumpcfg.cmap, PAGESIZE, pfn, PROT_READ, 1180 HAT_LOAD_NOCONSIST); 1181 HRSTOP(ds->perpage, map); 1182 1183 dump_pagecopy(dumpcfg.cmap, dumpcfg.page); 1184 1185 HRSTART(ds->perpage, unmap); 1186 hat_unload(kas.a_hat, dumpcfg.cmap, PAGESIZE, HAT_UNLOAD); 1187 HRSTOP(ds->perpage, unmap); 1188 1189 HRSTART(dumpcfg.perpage, compress); 1190 csize = compress(dumpcfg.page, dumpcfg.lzbuf, PAGESIZE); 1191 HRSTOP(dumpcfg.perpage, compress); 1192 1193 HRSTART(dumpcfg.perpage, write); 1194 dumpvp_write(&csize, sizeof (csize)); 1195 dumpvp_write(dumpcfg.lzbuf, csize); 1196 HRSTOP(dumpcfg.perpage, write); 1197 1198 if (dump_ioerr) { 1199 dumphdr->dump_flags &= ~DF_COMPLETE; 1200 dumphdr->dump_npages = ds->npages; 1201 break; 1202 } 1203 if (++ds->npages * 100LL / dumphdr->dump_npages > ds->percent_done) { 1204 int sec; 1205 1206 sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000; 1207 uprintf("^\r%2d:%02d %3d%% done", sec / 60, sec % 60, 1208 ++ds->percent_done); 1209 if (!panicstr) 1210 delay(1); /* let the output be sent */ 1211 } 1212 } 1213 1214 ds->elapsed = gethrtime() - ds->start; 1215 if (ds->elapsed < 1) 1216 ds->elapsed = 1; 1217 1218 /* record actual pages dumped */ 1219 dumphdr->dump_npages = ds->npages; 1220 1221 /* platform-specific data */ 1222 dumphdr->dump_npages += dump_plat_data(dumpcfg.page); 1223 1224 /* note any errors by clearing DF_COMPLETE */ 1225 if (dump_ioerr || ds->npages < dumphdr->dump_npages) 1226 dumphdr->dump_flags &= ~DF_COMPLETE; 1227 1228 /* end of stream blocks */ 1229 datatag = 0; 1230 dumpvp_write(&datatag, sizeof (datatag)); 1231 1232 bzero(&datahdr, sizeof (datahdr)); 1233 1234 /* buffer for metrics */ 1235 buf = dumpcfg.page; 1236 size = MIN(PAGESIZE, DUMP_OFFSET - sizeof (dumphdr_t) - 1237 sizeof (dumpdatahdr_t)); 1238 1239 /* finish the kmem intercepts, collect kmem verbose info */ 1240 if (panicstr) { 1241 datahdr.dump_metrics = kmem_dump_finish(buf, size); 1242 buf += datahdr.dump_metrics; 1243 size -= datahdr.dump_metrics; 1244 } 1245 1246 /* record in the header whether this is a fault-management panic */ 1247 if (panicstr) 1248 dumphdr->dump_fm_panic = is_fm_panic(); 1249 1250 /* compression info in data header */ 1251 datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC; 1252 datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION; 1253 datahdr.dump_maxcsize = PAGESIZE; 1254 datahdr.dump_maxrange = 1; 1255 datahdr.dump_nstreams = 1; 1256 datahdr.dump_clevel = 0; 1257 #ifdef COLLECT_METRICS 1258 if (dump_metrics_on) 1259 datahdr.dump_metrics += dumpsys_metrics(ds, buf, size); 1260 #endif 1261 datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data; 1262 1263 /* 1264 * Write out the initial and terminal dump headers. 1265 */ 1266 dumpbuf.vp_off = dumphdr->dump_start; 1267 dumpvp_write(dumphdr, sizeof (dumphdr_t)); 1268 (void) dumpvp_flush(); 1269 1270 dumpbuf.vp_limit = dumpvp_size; 1271 dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET; 1272 dumpvp_write(dumphdr, sizeof (dumphdr_t)); 1273 dumpvp_write(&datahdr, sizeof (dumpdatahdr_t)); 1274 dumpvp_write(dumpcfg.page, datahdr.dump_metrics); 1275 1276 (void) dumpvp_flush(); 1277 1278 uprintf("\r%3d%% done: %llu pages dumped, ", 1279 ds->percent_done, (u_longlong_t)ds->npages); 1280 1281 if (dump_ioerr == 0) { 1282 uprintf("dump succeeded\n"); 1283 } else { 1284 uprintf("dump failed: error %d\n", dump_ioerr); 1285 #ifdef DEBUG 1286 if (panicstr) 1287 debug_enter("dump failed"); 1288 #endif 1289 } 1290 1291 /* 1292 * Write out all undelivered messages. This has to be the *last* 1293 * thing we do because the dump process itself emits messages. 1294 */ 1295 if (panicstr) { 1296 dump_summary(); 1297 dump_ereports(); 1298 dump_messages(); 1299 } 1300 1301 delay(2 * hz); /* let people see the 'done' message */ 1302 dump_timeleft = 0; 1303 dump_ioerr = 0; 1304 1305 /* restore settings after live dump completes */ 1306 if (!panicstr) { 1307 /* release any VCHR open of the dump device */ 1308 if (dumpbuf.cdev_vp != NULL) { 1309 (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0, 1310 kcred, NULL); 1311 VN_RELE(dumpbuf.cdev_vp); 1312 dumpbuf.cdev_vp = NULL; 1313 } 1314 } 1315 } 1316 1317 /* 1318 * This function is called whenever the memory size, as represented 1319 * by the phys_install list, changes. 1320 */ 1321 void 1322 dump_resize() 1323 { 1324 mutex_enter(&dump_lock); 1325 dumphdr_init(); 1326 dumpbuf_resize(); 1327 dump_update_clevel(); 1328 mutex_exit(&dump_lock); 1329 } 1330 1331 /* 1332 * This function allows for dynamic resizing of a dump area. It assumes that 1333 * the underlying device has update its appropriate size(9P). 1334 */ 1335 int 1336 dumpvp_resize() 1337 { 1338 int error; 1339 vattr_t vattr; 1340 1341 mutex_enter(&dump_lock); 1342 vattr.va_mask = AT_SIZE; 1343 if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) { 1344 mutex_exit(&dump_lock); 1345 return (error); 1346 } 1347 1348 if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) { 1349 mutex_exit(&dump_lock); 1350 return (ENOSPC); 1351 } 1352 1353 dumpvp_size = vattr.va_size & -DUMP_OFFSET; 1354 mutex_exit(&dump_lock); 1355 return (0); 1356 } 1357 1358 int 1359 dump_set_uuid(const char *uuidstr) 1360 { 1361 const char *ptr; 1362 int i; 1363 1364 if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36) 1365 return (EINVAL); 1366 1367 /* uuid_parse is not common code so check manually */ 1368 for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) { 1369 switch (i) { 1370 case 8: 1371 case 13: 1372 case 18: 1373 case 23: 1374 if (*ptr != '-') 1375 return (EINVAL); 1376 break; 1377 1378 default: 1379 if (!isxdigit(*ptr)) 1380 return (EINVAL); 1381 break; 1382 } 1383 } 1384 1385 if (dump_osimage_uuid[0] != '\0') 1386 return (EALREADY); 1387 1388 (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1); 1389 1390 cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n", 1391 dump_osimage_uuid); 1392 1393 return (0); 1394 } 1395 1396 const char * 1397 dump_get_uuid(void) 1398 { 1399 return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : ""); 1400 }