1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2011 Jason King. All rights reserved. 27 * Copyright 2012 Joshua M. Clulow <josh@sysmgr.org> 28 * Copyright 2015 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> 29 */ 30 31 #include <ctype.h> 32 #include <getopt.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <sys/sysmacros.h> 37 #include <sys/elf_SPARC.h> 38 39 #include <libdisasm.h> 40 41 #include "dis_target.h" 42 #include "dis_util.h" 43 #include "dis_list.h" 44 45 int g_demangle; /* Demangle C++ names */ 46 int g_quiet; /* Quiet mode */ 47 int g_numeric; /* Numeric mode */ 48 int g_flags; /* libdisasm language flags */ 49 int g_doall; /* true if no functions or sections were given */ 50 51 dis_namelist_t *g_funclist; /* list of functions to disassemble, if any */ 52 dis_namelist_t *g_seclist; /* list of sections to disassemble, if any */ 53 54 /* 55 * Section options for -d, -D, and -s 56 */ 57 #define DIS_DATA_RELATIVE 1 58 #define DIS_DATA_ABSOLUTE 2 59 #define DIS_TEXT 3 60 61 /* 62 * libdisasm callback data. Keeps track of current data (function or section) 63 * and offset within that data. 64 */ 65 typedef struct dis_buffer { 66 dis_tgt_t *db_tgt; /* current dis target */ 67 void *db_data; /* function or section data */ 68 uint64_t db_addr; /* address of function start */ 69 size_t db_size; /* size of data */ 70 uint64_t db_nextaddr; /* next address to be read */ 71 } dis_buffer_t; 72 73 #define MINSYMWIDTH 22 /* Minimum width of symbol portion of line */ 74 75 /* 76 * Given a symbol+offset as returned by dis_tgt_lookup(), print an appropriately 77 * formatted symbol, based on the offset and current setttings. 78 */ 79 void 80 getsymname(uint64_t addr, const char *symbol, off_t offset, char *buf, 81 size_t buflen) 82 { 83 if (symbol == NULL || g_numeric) { 84 if (g_flags & DIS_OCTAL) 85 (void) snprintf(buf, buflen, "0%llo", addr); 86 else 87 (void) snprintf(buf, buflen, "0x%llx", addr); 88 } else { 89 if (g_demangle) 90 symbol = dis_demangle(symbol); 91 92 if (offset == 0) 93 (void) snprintf(buf, buflen, "%s", symbol); 94 else if (g_flags & DIS_OCTAL) 95 (void) snprintf(buf, buflen, "%s+0%o", symbol, offset); 96 else 97 (void) snprintf(buf, buflen, "%s+0x%x", symbol, offset); 98 } 99 } 100 101 /* 102 * Determine if we are on an architecture with fixed-size instructions, 103 * and if so, what size they are. 104 */ 105 static int 106 insn_size(dis_handle_t *dhp) 107 { 108 int min = dis_min_instrlen(dhp); 109 int max = dis_max_instrlen(dhp); 110 111 if (min == max) 112 return (min); 113 114 return (0); 115 } 116 117 /* 118 * The main disassembly routine. Given a fixed-sized buffer and starting 119 * address, disassemble the data using the supplied target and libdisasm handle. 120 */ 121 void 122 dis_data(dis_tgt_t *tgt, dis_handle_t *dhp, uint64_t addr, void *data, 123 size_t datalen) 124 { 125 dis_buffer_t db = { 0 }; 126 char buf[BUFSIZE]; 127 char symbuf[BUFSIZE]; 128 const char *symbol; 129 const char *last_symbol; 130 off_t symoffset; 131 int i; 132 int bytesperline; 133 size_t symsize; 134 int isfunc; 135 size_t symwidth = 0; 136 int ret; 137 int insz = insn_size(dhp); 138 139 db.db_tgt = tgt; 140 db.db_data = data; 141 db.db_addr = addr; 142 db.db_size = datalen; 143 144 dis_set_data(dhp, &db); 145 146 if ((bytesperline = dis_max_instrlen(dhp)) > 6) 147 bytesperline = 6; 148 149 symbol = NULL; 150 151 while (addr < db.db_addr + db.db_size) { 152 153 ret = dis_disassemble(dhp, addr, buf, BUFSIZE); 154 if (ret != 0 && insz > 0) { 155 /* 156 * Since we know instructions are fixed size, we 157 * always know the address of the next instruction 158 */ 159 (void) snprintf(buf, sizeof (buf), 160 "*** invalid opcode ***"); 161 db.db_nextaddr = addr + insz; 162 163 } else if (ret != 0) { 164 off_t next; 165 166 (void) snprintf(buf, sizeof (buf), 167 "*** invalid opcode ***"); 168 169 /* 170 * On architectures with variable sized instructions 171 * we have no way to figure out where the next 172 * instruction starts if we encounter an invalid 173 * instruction. Instead we print the rest of the 174 * instruction stream as hex until we reach the 175 * next valid symbol in the section. 176 */ 177 if ((next = dis_tgt_next_symbol(tgt, addr)) == 0) { 178 db.db_nextaddr = db.db_addr + db.db_size; 179 } else { 180 if (next > db.db_size) 181 db.db_nextaddr = db.db_addr + 182 db.db_size; 183 else 184 db.db_nextaddr = addr + next; 185 } 186 } 187 188 /* 189 * Print out the line as: 190 * 191 * address: bytes text 192 * 193 * If there are more than 6 bytes in any given instruction, 194 * spread the bytes across two lines. We try to get symbolic 195 * information for the address, but if that fails we print out 196 * the numeric address instead. 197 * 198 * We try to keep the address portion of the text aligned at 199 * MINSYMWIDTH characters. If we are disassembling a function 200 * with a long name, this can be annoying. So we pick a width 201 * based on the maximum width that the current symbol can be. 202 * This at least produces text aligned within each function. 203 */ 204 last_symbol = symbol; 205 symbol = dis_tgt_lookup(tgt, addr, &symoffset, 1, &symsize, 206 &isfunc); 207 if (symbol == NULL) { 208 symbol = dis_find_section(tgt, addr, &symoffset); 209 symsize = symoffset; 210 } 211 212 if (symbol != last_symbol) 213 getsymname(addr, symbol, symsize, symbuf, 214 sizeof (symbuf)); 215 216 symwidth = MAX(symwidth, strlen(symbuf)); 217 getsymname(addr, symbol, symoffset, symbuf, sizeof (symbuf)); 218 219 /* 220 * If we've crossed a new function boundary, print out the 221 * function name on a blank line. 222 */ 223 if (!g_quiet && symoffset == 0 && symbol != NULL && isfunc) 224 (void) printf("%s()\n", symbol); 225 226 (void) printf(" %s:%*s ", symbuf, 227 symwidth - strlen(symbuf), ""); 228 229 /* print bytes */ 230 for (i = 0; i < MIN(bytesperline, (db.db_nextaddr - addr)); 231 i++) { 232 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 233 if (g_flags & DIS_OCTAL) 234 (void) printf("%03o ", byte); 235 else 236 (void) printf("%02x ", byte); 237 } 238 239 /* trailing spaces for missing bytes */ 240 for (; i < bytesperline; i++) { 241 if (g_flags & DIS_OCTAL) 242 (void) printf(" "); 243 else 244 (void) printf(" "); 245 } 246 247 /* contents of disassembly */ 248 (void) printf(" %s", buf); 249 250 /* excess bytes that spill over onto subsequent lines */ 251 for (; i < db.db_nextaddr - addr; i++) { 252 int byte = *((uchar_t *)data + (addr - db.db_addr) + i); 253 if (i % bytesperline == 0) 254 (void) printf("\n %*s ", symwidth, ""); 255 if (g_flags & DIS_OCTAL) 256 (void) printf("%03o ", byte); 257 else 258 (void) printf("%02x ", byte); 259 } 260 261 (void) printf("\n"); 262 263 addr = db.db_nextaddr; 264 } 265 } 266 267 /* 268 * libdisasm wrapper around symbol lookup. Invoke the target-specific lookup 269 * function, and convert the result using getsymname(). 270 */ 271 int 272 do_lookup(void *data, uint64_t addr, char *buf, size_t buflen, uint64_t *start, 273 size_t *symlen) 274 { 275 dis_buffer_t *db = data; 276 const char *symbol; 277 off_t offset; 278 size_t size; 279 280 /* 281 * If NULL symbol is returned, getsymname takes care of 282 * printing appropriate address in buf instead of symbol. 283 */ 284 symbol = dis_tgt_lookup(db->db_tgt, addr, &offset, 0, &size, NULL); 285 286 if (buf != NULL) 287 getsymname(addr, symbol, offset, buf, buflen); 288 289 if (start != NULL) 290 *start = addr - offset; 291 if (symlen != NULL) 292 *symlen = size; 293 294 if (symbol == NULL) 295 return (-1); 296 297 return (0); 298 } 299 300 /* 301 * libdisasm wrapper around target reading. libdisasm will always read data 302 * in order, so update our current offset within the buffer appropriately. 303 * We only support reading from within the current object; libdisasm should 304 * never ask us to do otherwise. 305 */ 306 int 307 do_read(void *data, uint64_t addr, void *buf, size_t len) 308 { 309 dis_buffer_t *db = data; 310 size_t offset; 311 312 if (addr < db->db_addr || addr >= db->db_addr + db->db_size) 313 return (-1); 314 315 offset = addr - db->db_addr; 316 len = MIN(len, db->db_size - offset); 317 318 (void) memcpy(buf, (char *)db->db_data + offset, len); 319 320 db->db_nextaddr = addr + len; 321 322 return (len); 323 } 324 325 /* 326 * Routine to dump raw data in a human-readable format. Used by the -d and -D 327 * options. We model our output after the xxd(1) program, which gives nicely 328 * formatted output, along with an ASCII translation of the result. 329 */ 330 void 331 dump_data(uint64_t addr, void *data, size_t datalen) 332 { 333 uintptr_t curaddr = addr & (~0xf); 334 uint8_t *bytes = data; 335 int i; 336 int width; 337 338 /* 339 * Determine if the address given to us fits in 32-bit range, in which 340 * case use a 4-byte width. 341 */ 342 if (((addr + datalen) & 0xffffffff00000000ULL) == 0ULL) 343 width = 8; 344 else 345 width = 16; 346 347 while (curaddr < addr + datalen) { 348 /* 349 * Display leading address 350 */ 351 (void) printf("%0*x: ", width, curaddr); 352 353 /* 354 * Print out data in two-byte chunks. If the current address 355 * is before the starting address or after the end of the 356 * section, print spaces. 357 */ 358 for (i = 0; i < 16; i++) { 359 if (curaddr + i < addr ||curaddr + i >= addr + datalen) 360 (void) printf(" "); 361 else 362 (void) printf("%02x", 363 bytes[curaddr + i - addr]); 364 365 if (i & 1) 366 (void) printf(" "); 367 } 368 369 (void) printf(" "); 370 371 /* 372 * Print out the ASCII representation 373 */ 374 for (i = 0; i < 16; i++) { 375 if (curaddr + i < addr || 376 curaddr + i >= addr + datalen) { 377 (void) printf(" "); 378 } else { 379 uint8_t byte = bytes[curaddr + i - addr]; 380 if (isprint(byte)) 381 (void) printf("%c", byte); 382 else 383 (void) printf("."); 384 } 385 } 386 387 (void) printf("\n"); 388 389 curaddr += 16; 390 } 391 } 392 393 /* 394 * Disassemble a section implicitly specified as part of a file. This function 395 * is called for all sections when no other flags are specified. We ignore any 396 * data sections, and print out only those sections containing text. 397 */ 398 void 399 dis_text_section(dis_tgt_t *tgt, dis_scn_t *scn, void *data) 400 { 401 dis_handle_t *dhp = data; 402 403 /* ignore data sections */ 404 if (!dis_section_istext(scn)) 405 return; 406 407 if (!g_quiet) 408 (void) printf("\nsection %s\n", dis_section_name(scn)); 409 410 dis_data(tgt, dhp, dis_section_addr(scn), dis_section_data(scn), 411 dis_section_size(scn)); 412 } 413 414 /* 415 * Structure passed to dis_named_{section,function} which keeps track of both 416 * the target and the libdisasm handle. 417 */ 418 typedef struct callback_arg { 419 dis_tgt_t *ca_tgt; 420 dis_handle_t *ca_handle; 421 } callback_arg_t; 422 423 /* 424 * Disassemble a section explicitly named with -s, -d, or -D. The 'type' 425 * argument contains the type of argument given. Pass the data onto the 426 * appropriate helper routine. 427 */ 428 void 429 dis_named_section(dis_scn_t *scn, int type, void *data) 430 { 431 callback_arg_t *ca = data; 432 433 if (!g_quiet) 434 (void) printf("\nsection %s\n", dis_section_name(scn)); 435 436 switch (type) { 437 case DIS_DATA_RELATIVE: 438 dump_data(0, dis_section_data(scn), dis_section_size(scn)); 439 break; 440 case DIS_DATA_ABSOLUTE: 441 dump_data(dis_section_addr(scn), dis_section_data(scn), 442 dis_section_size(scn)); 443 break; 444 case DIS_TEXT: 445 dis_data(ca->ca_tgt, ca->ca_handle, dis_section_addr(scn), 446 dis_section_data(scn), dis_section_size(scn)); 447 break; 448 } 449 } 450 451 /* 452 * Disassemble a function explicitly specified with '-F'. The 'type' argument 453 * is unused. 454 */ 455 /* ARGSUSED */ 456 void 457 dis_named_function(dis_func_t *func, int type, void *data) 458 { 459 callback_arg_t *ca = data; 460 461 dis_data(ca->ca_tgt, ca->ca_handle, dis_function_addr(func), 462 dis_function_data(func), dis_function_size(func)); 463 } 464 465 /* 466 * Disassemble a complete file. First, we determine the type of the file based 467 * on the ELF machine type, and instantiate a version of the disassembler 468 * appropriate for the file. We then resolve any named sections or functions 469 * against the file, and iterate over the results (or all sections if no flags 470 * were specified). 471 */ 472 void 473 dis_file(const char *filename) 474 { 475 dis_tgt_t *tgt, *current; 476 dis_scnlist_t *sections; 477 dis_funclist_t *functions; 478 dis_handle_t *dhp; 479 GElf_Ehdr ehdr; 480 481 /* 482 * First, initialize the target 483 */ 484 if ((tgt = dis_tgt_create(filename)) == NULL) 485 return; 486 487 if (!g_quiet) 488 (void) printf("disassembly for %s\n\n", filename); 489 490 /* 491 * A given file may contain multiple targets (if it is an archive, for 492 * example). We iterate over all possible targets if this is the case. 493 */ 494 for (current = tgt; current != NULL; current = dis_tgt_next(current)) { 495 dis_tgt_ehdr(current, &ehdr); 496 497 /* 498 * Eventually, this should probably live within libdisasm, and 499 * we should be able to disassemble targets from different 500 * architectures. For now, we only support objects as the 501 * native machine type. 502 */ 503 switch (ehdr.e_machine) { 504 case EM_SPARC: 505 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 506 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 507 warn("invalid E_IDENT field for SPARC object"); 508 return; 509 } 510 g_flags |= DIS_SPARC_V8; 511 break; 512 513 case EM_SPARC32PLUS: 514 { 515 uint64_t flags = ehdr.e_flags & EF_SPARC_32PLUS_MASK; 516 517 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 518 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 519 warn("invalid E_IDENT field for SPARC object"); 520 return; 521 } 522 523 if (flags != 0 && 524 (flags & (EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | 525 EF_SPARC_SUN_US3)) != EF_SPARC_32PLUS) 526 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 527 else 528 g_flags |= DIS_SPARC_V9; 529 break; 530 } 531 532 case EM_SPARCV9: 533 if (ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 534 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 535 warn("invalid E_IDENT field for SPARC object"); 536 return; 537 } 538 539 g_flags |= DIS_SPARC_V9 | DIS_SPARC_V9_SGI; 540 break; 541 542 case EM_386: 543 g_flags |= DIS_X86_SIZE32; 544 break; 545 546 case EM_AMD64: 547 g_flags |= DIS_X86_SIZE64; 548 break; 549 550 case EM_S370: 551 g_flags |= DIS_S370; 552 553 if (ehdr.e_ident[EI_CLASS] != ELFCLASS32 || 554 ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 555 warn("invalid E_IDENT field for S370 object"); 556 return; 557 } 558 break; 559 560 case EM_S390: 561 if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { 562 g_flags |= DIS_S390_31; 563 } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) { 564 g_flags |= DIS_S390_64; 565 } else { 566 warn("invalid E_IDENT field for S390 object"); 567 return; 568 } 569 570 if (ehdr.e_ident[EI_DATA] != ELFDATA2MSB) { 571 warn("invalid E_IDENT field for S390 object"); 572 return; 573 } 574 break; 575 576 default: 577 die("%s: unsupported ELF machine 0x%x", filename, 578 ehdr.e_machine); 579 } 580 581 /* 582 * If ET_REL (.o), printing immediate symbols is likely to 583 * result in garbage, as symbol lookups on unrelocated 584 * immediates find false and useless matches. 585 */ 586 587 if (ehdr.e_type == ET_REL) 588 g_flags |= DIS_NOIMMSYM; 589 590 if (!g_quiet && dis_tgt_member(current) != NULL) 591 (void) printf("\narchive member %s\n", 592 dis_tgt_member(current)); 593 594 /* 595 * Instantiate a libdisasm handle based on the file type. 596 */ 597 if ((dhp = dis_handle_create(g_flags, current, do_lookup, 598 do_read)) == NULL) 599 die("%s: failed to initialize disassembler: %s", 600 filename, dis_strerror(dis_errno())); 601 602 if (g_doall) { 603 /* 604 * With no arguments, iterate over all sections and 605 * disassemble only those that contain text. 606 */ 607 dis_tgt_section_iter(current, dis_text_section, dhp); 608 } else { 609 callback_arg_t ca; 610 611 ca.ca_tgt = current; 612 ca.ca_handle = dhp; 613 614 /* 615 * If sections or functions were explicitly specified, 616 * resolve those names against the object, and iterate 617 * over just the resulting data. 618 */ 619 sections = dis_namelist_resolve_sections(g_seclist, 620 current); 621 functions = dis_namelist_resolve_functions(g_funclist, 622 current); 623 624 dis_scnlist_iter(sections, dis_named_section, &ca); 625 dis_funclist_iter(functions, dis_named_function, &ca); 626 627 dis_scnlist_destroy(sections); 628 dis_funclist_destroy(functions); 629 } 630 631 dis_handle_destroy(dhp); 632 } 633 634 dis_tgt_destroy(tgt); 635 } 636 637 void 638 usage(void) 639 { 640 (void) fprintf(stderr, "usage: dis [-CVoqn] [-d sec] \n"); 641 (void) fprintf(stderr, "\t[-D sec] [-F function] [-t sec] file ..\n"); 642 exit(2); 643 } 644 645 typedef struct lib_node { 646 char *path; 647 struct lib_node *next; 648 } lib_node_t; 649 650 int 651 main(int argc, char **argv) 652 { 653 int optchar; 654 int i; 655 lib_node_t *libs = NULL; 656 657 g_funclist = dis_namelist_create(); 658 g_seclist = dis_namelist_create(); 659 660 while ((optchar = getopt(argc, argv, "Cd:D:F:l:Lot:Vqn")) != -1) { 661 switch (optchar) { 662 case 'C': 663 g_demangle = 1; 664 break; 665 case 'd': 666 dis_namelist_add(g_seclist, optarg, DIS_DATA_RELATIVE); 667 break; 668 case 'D': 669 dis_namelist_add(g_seclist, optarg, DIS_DATA_ABSOLUTE); 670 break; 671 case 'F': 672 dis_namelist_add(g_funclist, optarg, 0); 673 break; 674 case 'l': { 675 /* 676 * The '-l foo' option historically would attempt to 677 * disassemble '$LIBDIR/libfoo.a'. The $LIBDIR 678 * environment variable has never been supported or 679 * documented for our linker. However, until this 680 * option is formally EOLed, we have to support it. 681 */ 682 char *dir; 683 lib_node_t *node; 684 size_t len; 685 686 if ((dir = getenv("LIBDIR")) == NULL || 687 dir[0] == '\0') 688 dir = "/usr/lib"; 689 node = safe_malloc(sizeof (lib_node_t)); 690 len = strlen(optarg) + strlen(dir) + sizeof ("/lib.a"); 691 node->path = safe_malloc(len); 692 693 (void) snprintf(node->path, len, "%s/lib%s.a", dir, 694 optarg); 695 node->next = libs; 696 libs = node; 697 break; 698 } 699 case 'L': 700 /* 701 * The '-L' option historically would attempt to read 702 * the .debug section of the target to determine source 703 * line information in order to annotate the output. 704 * No compiler has emitted these sections in many years, 705 * and the option has never done what it purported to 706 * do. We silently consume the option for 707 * compatibility. 708 */ 709 break; 710 case 'n': 711 g_numeric = 1; 712 break; 713 case 'o': 714 g_flags |= DIS_OCTAL; 715 break; 716 case 'q': 717 g_quiet = 1; 718 break; 719 case 't': 720 dis_namelist_add(g_seclist, optarg, DIS_TEXT); 721 break; 722 case 'V': 723 (void) printf("Solaris disassembler version 1.0\n"); 724 return (0); 725 default: 726 usage(); 727 break; 728 } 729 } 730 731 argc -= optind; 732 argv += optind; 733 734 if (argc == 0 && libs == NULL) { 735 warn("no objects specified"); 736 usage(); 737 } 738 739 if (dis_namelist_empty(g_funclist) && dis_namelist_empty(g_seclist)) 740 g_doall = 1; 741 742 /* 743 * See comment for 'l' option, above. 744 */ 745 while (libs != NULL) { 746 lib_node_t *node = libs->next; 747 748 dis_file(libs->path); 749 free(libs->path); 750 free(libs); 751 libs = node; 752 } 753 754 for (i = 0; i < argc; i++) 755 dis_file(argv[i]); 756 757 dis_namelist_destroy(g_funclist); 758 dis_namelist_destroy(g_seclist); 759 760 return (g_error); 761 }