Print this page
3317 dis(1) should support cross-target disassembly
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/dis/dis_target.c
+++ new/usr/src/cmd/dis/dis_target.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 *
25 25 * Copyright 2011 Jason King. All rights reserved.
26 26 */
27 27
28 28 #include <assert.h>
29 29 #include <errno.h>
30 30 #include <fcntl.h>
31 31 #include <gelf.h>
32 32 #include <libelf.h>
33 33 #include <stdlib.h>
34 34 #include <string.h>
35 35 #include <unistd.h>
36 36
37 37 #include <sys/fcntl.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/sysmacros.h>
40 40 #include <sys/types.h>
41 41
42 42 #include "dis_target.h"
43 43 #include "dis_util.h"
44 44
45 45 /*
46 46 * Standard ELF disassembler target.
47 47 *
48 48 * We only support disassembly of ELF files, though this target interface could
49 49 * be extended in the future. Each basic type (target, func, section) contains
50 50 * enough information to uniquely identify the location within the file. The
51 51 * interfaces use libelf(3LIB) to do the actual processing of the file.
52 52 */
53 53
54 54 /*
55 55 * Symbol table entry type. We maintain our own symbol table sorted by address,
56 56 * with the symbol name already resolved against the ELF symbol table.
57 57 */
58 58 typedef struct sym_entry {
59 59 GElf_Sym se_sym; /* value of symbol */
60 60 char *se_name; /* name of symbol */
61 61 int se_shndx; /* section where symbol is located */
62 62 } sym_entry_t;
63 63
64 64 /*
65 65 * Create a map of the virtual address ranges of every section. This will
66 66 * allow us to create dummpy mappings for unassigned addresses. Otherwise
67 67 * multiple sections with unassigned addresses will appear to overlap and
68 68 * mess up symbol resolution (which uses the virtual address).
69 69 */
70 70 typedef struct dis_shnmap {
71 71 const char *dm_name; /* name of section */
72 72 uint64_t dm_start; /* virtual address of section */
73 73 size_t dm_length; /* address length */
74 74 boolean_t dm_mapped; /* did we assign the mapping */
75 75 } dis_shnmap_t;
76 76
77 77 /*
78 78 * Target data structure. This structure keeps track of the ELF file
79 79 * information, a few bits of pre-processed section index information, and
80 80 * sorted versions of the symbol table. We also keep track of the last symbol
81 81 * looked up, as the majority of lookups remain within the same symbol.
82 82 */
83 83 struct dis_tgt {
84 84 Elf *dt_elf; /* libelf handle */
85 85 Elf *dt_elf_root; /* main libelf handle (for archives) */
86 86 const char *dt_filename; /* name of file */
87 87 int dt_fd; /* underlying file descriptor */
88 88 size_t dt_shstrndx; /* section index of .shstrtab */
89 89 size_t dt_symidx; /* section index of symbol table */
90 90 sym_entry_t *dt_symcache; /* last symbol looked up */
91 91 sym_entry_t *dt_symtab; /* sorted symbol table */
92 92 int dt_symcount; /* # of symbol table entries */
93 93 struct dis_tgt *dt_next; /* next target (for archives) */
94 94 Elf_Arhdr *dt_arhdr; /* archive header (for archives) */
95 95 dis_shnmap_t *dt_shnmap; /* section address map */
96 96 size_t dt_shncount; /* # of sections in target */
97 97 };
98 98
99 99 /*
100 100 * Function data structure. We resolve the symbol and lookup the associated ELF
101 101 * data when building this structure. The offset is calculated based on the
102 102 * section's starting address.
103 103 */
104 104 struct dis_func {
105 105 sym_entry_t *df_sym; /* symbol table reference */
106 106 Elf_Data *df_data; /* associated ELF data */
107 107 size_t df_offset; /* offset within data */
108 108 };
109 109
110 110 /*
111 111 * Section data structure. We store the entire section header so that we can
112 112 * determine some properties (such as whether or not it contains text) after
113 113 * building the structure.
114 114 */
115 115 struct dis_scn {
116 116 GElf_Shdr ds_shdr;
117 117 const char *ds_name;
118 118 Elf_Data *ds_data;
119 119 };
120 120
121 121 /* Lifted from Psymtab.c, omitting STT_TLS */
122 122 #define DATA_TYPES \
123 123 ((1 << STT_OBJECT) | (1 << STT_FUNC) | (1 << STT_COMMON))
124 124 #define IS_DATA_TYPE(tp) (((1 << (tp)) & DATA_TYPES) != 0)
125 125
126 126 /*
127 127 * Save the virtual address range for this section and select the
128 128 * best section to use as the symbol table. We prefer SHT_SYMTAB
129 129 * over SHT_DYNSYM.
130 130 */
131 131 /* ARGSUSED */
132 132 static void
133 133 tgt_scn_init(dis_tgt_t *tgt, dis_scn_t *scn, void *data)
134 134 {
135 135 int *index = data;
136 136
137 137 *index += 1;
138 138
139 139 tgt->dt_shnmap[*index].dm_name = scn->ds_name;
140 140 tgt->dt_shnmap[*index].dm_start = scn->ds_shdr.sh_addr;
141 141 tgt->dt_shnmap[*index].dm_length = scn->ds_shdr.sh_size;
142 142 tgt->dt_shnmap[*index].dm_mapped = B_FALSE;
143 143
144 144 /*
145 145 * Prefer SHT_SYMTAB over SHT_DYNSYM
146 146 */
147 147 if (scn->ds_shdr.sh_type == SHT_DYNSYM && tgt->dt_symidx == 0)
148 148 tgt->dt_symidx = *index;
149 149 else if (scn->ds_shdr.sh_type == SHT_SYMTAB)
150 150 tgt->dt_symidx = *index;
151 151 }
152 152
153 153 static int
154 154 sym_compare(const void *a, const void *b)
155 155 {
156 156 const sym_entry_t *syma = a;
157 157 const sym_entry_t *symb = b;
158 158 const char *aname = syma->se_name;
159 159 const char *bname = symb->se_name;
160 160
161 161 if (syma->se_sym.st_value < symb->se_sym.st_value)
162 162 return (-1);
163 163
164 164 if (syma->se_sym.st_value > symb->se_sym.st_value)
165 165 return (1);
166 166
167 167 /*
168 168 * Prefer functions over non-functions
169 169 */
170 170 if (GELF_ST_TYPE(syma->se_sym.st_info) !=
171 171 GELF_ST_TYPE(symb->se_sym.st_info)) {
172 172 if (GELF_ST_TYPE(syma->se_sym.st_info) == STT_FUNC)
173 173 return (-1);
174 174 if (GELF_ST_TYPE(symb->se_sym.st_info) == STT_FUNC)
175 175 return (1);
176 176 }
177 177
178 178 /*
179 179 * For symbols with the same address and type, we sort them according to
180 180 * a hierarchy:
181 181 *
182 182 * 1. weak symbols (common name)
183 183 * 2. global symbols (external name)
184 184 * 3. local symbols
185 185 */
186 186 if (GELF_ST_BIND(syma->se_sym.st_info) !=
187 187 GELF_ST_BIND(symb->se_sym.st_info)) {
188 188 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_WEAK)
189 189 return (-1);
190 190 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_WEAK)
191 191 return (1);
192 192
193 193 if (GELF_ST_BIND(syma->se_sym.st_info) == STB_GLOBAL)
194 194 return (-1);
195 195 if (GELF_ST_BIND(symb->se_sym.st_info) == STB_GLOBAL)
196 196 return (1);
197 197 }
198 198
199 199 /*
200 200 * As a last resort, if we have multiple symbols of the same type at the
201 201 * same address, prefer the version with the fewest leading underscores.
202 202 */
203 203 if (aname == NULL)
204 204 return (-1);
205 205 if (bname == NULL)
206 206 return (1);
207 207
208 208 while (*aname == '_' && *bname == '_') {
209 209 aname++;
210 210 bname++;
211 211 }
212 212
213 213 if (*bname == '_')
214 214 return (-1);
215 215 if (*aname == '_')
216 216 return (1);
217 217
218 218 /*
219 219 * Prefer the symbol with the smaller size.
220 220 */
221 221 if (syma->se_sym.st_size < symb->se_sym.st_size)
222 222 return (-1);
223 223 if (syma->se_sym.st_size > symb->se_sym.st_size)
224 224 return (1);
225 225
226 226 /*
227 227 * We really do have two identical symbols for some reason. Just report
228 228 * them as equal, and to the lucky one go the spoils.
229 229 */
230 230 return (0);
231 231 }
232 232
233 233 /*
234 234 * Construct an optimized symbol table sorted by starting address.
235 235 */
236 236 static void
237 237 construct_symtab(dis_tgt_t *tgt)
238 238 {
239 239 Elf_Scn *scn;
240 240 GElf_Shdr shdr;
241 241 Elf_Data *symdata;
242 242 int i;
243 243 GElf_Word *symshndx = NULL;
244 244 int symshndx_size;
245 245 sym_entry_t *sym;
246 246 sym_entry_t *p_symtab = NULL;
247 247 int nsym = 0; /* count of symbols we're not interested in */
248 248
249 249 /*
250 250 * Find the symshndx section, if any
251 251 */
252 252 for (scn = elf_nextscn(tgt->dt_elf, NULL); scn != NULL;
253 253 scn = elf_nextscn(tgt->dt_elf, scn)) {
254 254 if (gelf_getshdr(scn, &shdr) == NULL)
255 255 break;
256 256 if (shdr.sh_type == SHT_SYMTAB_SHNDX &&
257 257 shdr.sh_link == tgt->dt_symidx) {
258 258 Elf_Data *data;
259 259
260 260 if ((data = elf_getdata(scn, NULL)) != NULL) {
261 261 symshndx = (GElf_Word *)data->d_buf;
262 262 symshndx_size = data->d_size /
263 263 sizeof (GElf_Word);
264 264 break;
265 265 }
266 266 }
267 267 }
268 268
269 269 if ((scn = elf_getscn(tgt->dt_elf, tgt->dt_symidx)) == NULL)
270 270 die("%s: failed to get section information", tgt->dt_filename);
271 271 if (gelf_getshdr(scn, &shdr) == NULL)
272 272 die("%s: failed to get section header", tgt->dt_filename);
273 273 if (shdr.sh_entsize == 0)
274 274 die("%s: symbol table has zero size", tgt->dt_filename);
275 275
276 276 if ((symdata = elf_getdata(scn, NULL)) == NULL)
277 277 die("%s: failed to get symbol table", tgt->dt_filename);
278 278
279 279 tgt->dt_symcount = symdata->d_size / gelf_fsize(tgt->dt_elf, ELF_T_SYM,
280 280 1, EV_CURRENT);
281 281
282 282 p_symtab = safe_malloc(tgt->dt_symcount * sizeof (sym_entry_t));
283 283
284 284 for (i = 0, sym = p_symtab; i < tgt->dt_symcount; i++) {
285 285 if (gelf_getsym(symdata, i, &(sym->se_sym)) == NULL) {
286 286 warn("%s: gelf_getsym returned NULL for %d",
287 287 tgt->dt_filename, i);
288 288 nsym++;
289 289 continue;
290 290 }
291 291
292 292 /*
293 293 * We're only interested in data symbols.
294 294 */
295 295 if (!IS_DATA_TYPE(GELF_ST_TYPE(sym->se_sym.st_info))) {
296 296 nsym++;
297 297 continue;
298 298 }
299 299
300 300 if (sym->se_sym.st_shndx == SHN_XINDEX && symshndx != NULL) {
301 301 if (i > symshndx_size) {
302 302 warn("%s: bad SHNX_XINDEX %d",
303 303 tgt->dt_filename, i);
304 304 sym->se_shndx = -1;
305 305 } else {
306 306 sym->se_shndx = symshndx[i];
307 307 }
308 308 } else {
309 309 sym->se_shndx = sym->se_sym.st_shndx;
310 310 }
311 311
312 312 /* Deal with symbols with special section indicies */
313 313 if (sym->se_shndx == SHN_ABS) {
314 314 /*
315 315 * If st_value == 0, references to these
316 316 * symbols in code are modified in situ
317 317 * thus we will never attempt to look
318 318 * them up.
319 319 */
320 320 if (sym->se_sym.st_value == 0) {
321 321 /*
322 322 * References to these symbols in code
323 323 * are modified in situ by the runtime
324 324 * linker and no code on disk will ever
325 325 * attempt to look them up.
326 326 */
327 327 nsym++;
328 328 continue;
329 329 } else {
330 330 /*
331 331 * If st_value != 0, (such as examining
332 332 * something in /system/object/.../object)
333 333 * the values should resolve to a value
334 334 * within an existing section (such as
335 335 * .data). This also means it never needs
336 336 * to have st_value mapped.
337 337 */
338 338 sym++;
339 339 continue;
340 340 }
341 341 }
342 342
343 343 /*
344 344 * Ignore the symbol if it has some other special
345 345 * section index
346 346 */
347 347 if (sym->se_shndx == SHN_UNDEF ||
348 348 sym->se_shndx >= SHN_LORESERVE) {
349 349 nsym++;
350 350 continue;
351 351 }
352 352
353 353 if ((sym->se_name = elf_strptr(tgt->dt_elf, shdr.sh_link,
354 354 (size_t)sym->se_sym.st_name)) == NULL) {
355 355 warn("%s: failed to lookup symbol %d name",
356 356 tgt->dt_filename, i);
357 357 nsym++;
358 358 continue;
359 359 }
360 360
361 361 /*
362 362 * If we had to map this section, its symbol value
363 363 * also needs to be mapped.
364 364 */
365 365 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
366 366 sym->se_sym.st_value +=
367 367 tgt->dt_shnmap[sym->se_shndx].dm_start;
368 368
369 369 sym++;
370 370 }
371 371
372 372 tgt->dt_symcount -= nsym;
373 373 tgt->dt_symtab = realloc(p_symtab, tgt->dt_symcount *
374 374 sizeof (sym_entry_t));
375 375
376 376 qsort(tgt->dt_symtab, tgt->dt_symcount, sizeof (sym_entry_t),
377 377 sym_compare);
378 378 }
379 379
380 380 /*
381 381 * Assign virtual address ranges for sections that need it
382 382 */
383 383 static void
384 384 create_addrmap(dis_tgt_t *tgt)
385 385 {
386 386 uint64_t addr;
387 387 int i;
388 388
389 389 if (tgt->dt_shnmap == NULL)
390 390 return;
391 391
392 392 /* find the greatest used address */
393 393 for (addr = 0, i = 1; i < tgt->dt_shncount; i++)
394 394 if (tgt->dt_shnmap[i].dm_start > addr)
395 395 addr = tgt->dt_shnmap[i].dm_start +
396 396 tgt->dt_shnmap[i].dm_length;
397 397
398 398 addr = P2ROUNDUP(addr, 0x1000);
399 399
400 400 /*
401 401 * Assign section a starting address beyond the largest mapped section
402 402 * if no address was given.
403 403 */
404 404 for (i = 1; i < tgt->dt_shncount; i++) {
405 405 if (tgt->dt_shnmap[i].dm_start != 0)
406 406 continue;
407 407
408 408 tgt->dt_shnmap[i].dm_start = addr;
409 409 tgt->dt_shnmap[i].dm_mapped = B_TRUE;
410 410 addr = P2ROUNDUP(addr + tgt->dt_shnmap[i].dm_length, 0x1000);
411 411 }
412 412 }
413 413
414 414 /*
415 415 * Create a target backed by an ELF file.
416 416 */
417 417 dis_tgt_t *
418 418 dis_tgt_create(const char *file)
419 419 {
420 420 dis_tgt_t *tgt, *current;
421 421 int idx;
422 422 Elf *elf;
423 423 GElf_Ehdr ehdr;
424 424 Elf_Arhdr *arhdr = NULL;
425 425 int cmd;
426 426
427 427 if (elf_version(EV_CURRENT) == EV_NONE)
428 428 die("libelf(3ELF) out of date");
429 429
430 430 tgt = safe_malloc(sizeof (dis_tgt_t));
431 431
432 432 if ((tgt->dt_fd = open(file, O_RDONLY)) < 0) {
433 433 warn("%s: failed opening file, reason: %s", file,
434 434 strerror(errno));
435 435 free(tgt);
436 436 return (NULL);
437 437 }
438 438
439 439 if ((tgt->dt_elf_root =
440 440 elf_begin(tgt->dt_fd, ELF_C_READ, NULL)) == NULL) {
441 441 warn("%s: invalid or corrupt ELF file", file);
442 442 dis_tgt_destroy(tgt);
443 443 return (NULL);
444 444 }
445 445
446 446 current = tgt;
447 447 cmd = ELF_C_READ;
448 448 while ((elf = elf_begin(tgt->dt_fd, cmd, tgt->dt_elf_root)) != NULL) {
449 449 size_t shnum = 0;
450 450
451 451 if (elf_kind(tgt->dt_elf_root) == ELF_K_AR &&
452 452 (arhdr = elf_getarhdr(elf)) == NULL) {
453 453 warn("%s: malformed archive", file);
454 454 dis_tgt_destroy(tgt);
455 455 return (NULL);
456 456 }
457 457
458 458 /*
459 459 * Make sure that this Elf file is sane
460 460 */
461 461 if (gelf_getehdr(elf, &ehdr) == NULL) {
462 462 if (arhdr != NULL) {
463 463 /*
464 464 * For archives, we drive on in the face of bad
465 465 * members. The "/" and "//" members are
466 466 * special, and should be silently ignored.
467 467 */
468 468 if (strcmp(arhdr->ar_name, "/") != 0 &&
469 469 strcmp(arhdr->ar_name, "//") != 0)
470 470 warn("%s[%s]: invalid file type",
471 471 file, arhdr->ar_name);
472 472 cmd = elf_next(elf);
473 473 (void) elf_end(elf);
474 474 continue;
475 475 }
476 476
477 477 warn("%s: invalid file type", file);
478 478 dis_tgt_destroy(tgt);
479 479 return (NULL);
480 480 }
481 481
482 482 /*
483 483 * If we're seeing a new Elf object, then we have an
484 484 * archive. In this case, we create a new target, and chain it
485 485 * off the master target. We can later iterate over these
486 486 * targets using dis_tgt_next().
487 487 */
488 488 if (current->dt_elf != NULL) {
489 489 dis_tgt_t *next = safe_malloc(sizeof (dis_tgt_t));
490 490 next->dt_elf_root = tgt->dt_elf_root;
491 491 next->dt_fd = -1;
492 492 current->dt_next = next;
493 493 current = next;
494 494 }
495 495 current->dt_elf = elf;
496 496 current->dt_arhdr = arhdr;
497 497
498 498 if (elf_getshdrstrndx(elf, ¤t->dt_shstrndx) == -1) {
499 499 warn("%s: failed to get section string table for "
500 500 "file", file);
501 501 dis_tgt_destroy(tgt);
502 502 return (NULL);
503 503 }
504 504
505 505 if (elf_getshdrnum(elf, &shnum) == -1) {
506 506 warn("%s: failed to get number of sections in file",
507 507 file);
508 508 dis_tgt_destroy(tgt);
509 509 return (NULL);
510 510 }
511 511
512 512 current->dt_shnmap = safe_malloc(sizeof (dis_shnmap_t) *
513 513 shnum);
514 514 current->dt_shncount = shnum;
515 515
516 516 idx = 0;
517 517 dis_tgt_section_iter(current, tgt_scn_init, &idx);
518 518 current->dt_filename = file;
519 519
520 520 create_addrmap(current);
521 521 if (current->dt_symidx != 0)
522 522 construct_symtab(current);
523 523
524 524 cmd = elf_next(elf);
525 525 }
526 526
527 527 /*
528 528 * Final sanity check. If we had an archive with no members, then bail
529 529 * out with a nice message.
530 530 */
531 531 if (tgt->dt_elf == NULL) {
532 532 warn("%s: empty archive\n", file);
533 533 dis_tgt_destroy(tgt);
534 534 return (NULL);
535 535 }
536 536
537 537 return (tgt);
538 538 }
539 539
540 540 /*
541 541 * Return the filename associated with the target.
542 542 */
543 543 const char *
544 544 dis_tgt_name(dis_tgt_t *tgt)
545 545 {
546 546 return (tgt->dt_filename);
547 547 }
548 548
549 549 /*
550 550 * Return the archive member name, if any.
551 551 */
552 552 const char *
553 553 dis_tgt_member(dis_tgt_t *tgt)
554 554 {
555 555 if (tgt->dt_arhdr)
556 556 return (tgt->dt_arhdr->ar_name);
557 557 else
558 558 return (NULL);
559 559 }
560 560
561 561 /*
562 562 * Return the Elf_Ehdr associated with this target. Needed to determine which
563 563 * disassembler to use.
564 564 */
565 565 void
566 566 dis_tgt_ehdr(dis_tgt_t *tgt, GElf_Ehdr *ehdr)
567 567 {
568 568 (void) gelf_getehdr(tgt->dt_elf, ehdr);
569 569 }
570 570
571 571 /*
572 572 * Return the next target in the list, if this is an archive.
573 573 */
574 574 dis_tgt_t *
575 575 dis_tgt_next(dis_tgt_t *tgt)
576 576 {
577 577 return (tgt->dt_next);
578 578 }
579 579
580 580 /*
581 581 * Destroy a target and free up any associated memory.
582 582 */
583 583 void
584 584 dis_tgt_destroy(dis_tgt_t *tgt)
585 585 {
586 586 dis_tgt_t *current, *next;
587 587
588 588 current = tgt->dt_next;
589 589 while (current != NULL) {
590 590 next = current->dt_next;
591 591 if (current->dt_elf)
592 592 (void) elf_end(current->dt_elf);
593 593 if (current->dt_symtab)
594 594 free(current->dt_symtab);
595 595 free(current);
596 596 current = next;
597 597 }
598 598
599 599 if (tgt->dt_elf)
600 600 (void) elf_end(tgt->dt_elf);
601 601 if (tgt->dt_elf_root)
602 602 (void) elf_end(tgt->dt_elf_root);
603 603
604 604 if (tgt->dt_symtab)
605 605 free(tgt->dt_symtab);
606 606
607 607 free(tgt);
608 608 }
609 609
610 610 /*
611 611 * Given an address, return the section it is in and set the offset within
612 612 * the section.
613 613 */
614 614 const char *
615 615 dis_find_section(dis_tgt_t *tgt, uint64_t addr, off_t *offset)
616 616 {
617 617 int i;
618 618
619 619 for (i = 1; i < tgt->dt_shncount; i++) {
620 620 if ((addr >= tgt->dt_shnmap[i].dm_start) &&
621 621 (addr < tgt->dt_shnmap[i].dm_start +
622 622 tgt->dt_shnmap[i].dm_length)) {
623 623 *offset = addr - tgt->dt_shnmap[i].dm_start;
624 624 return (tgt->dt_shnmap[i].dm_name);
625 625 }
626 626 }
627 627
628 628 *offset = 0;
629 629 return (NULL);
630 630 }
631 631
632 632 /*
633 633 * Given an address, returns the name of the corresponding symbol, as well as
634 634 * the offset within that symbol. If no matching symbol is found, then NULL is
635 635 * returned.
636 636 *
637 637 * If 'cache_result' is specified, then we keep track of the resulting symbol.
638 638 * This cached result is consulted first on subsequent lookups in order to avoid
639 639 * unecessary lookups. This flag should be used for resolving the current PC,
640 640 * as the majority of addresses stay within the current function.
641 641 */
642 642 const char *
643 643 dis_tgt_lookup(dis_tgt_t *tgt, uint64_t addr, off_t *offset, int cache_result,
644 644 size_t *size, int *isfunc)
645 645 {
646 646 int lo, hi, mid;
647 647 sym_entry_t *sym, *osym, *match;
648 648 int found;
649 649
650 650 *offset = 0;
651 651 *size = 0;
652 652 if (isfunc != NULL)
653 653 *isfunc = 0;
654 654
655 655 if (tgt->dt_symcache != NULL &&
656 656 addr >= tgt->dt_symcache->se_sym.st_value &&
657 657 addr < tgt->dt_symcache->se_sym.st_value +
658 658 tgt->dt_symcache->se_sym.st_size) {
659 659 sym = tgt->dt_symcache;
660 660 *offset = addr - sym->se_sym.st_value;
661 661 *size = sym->se_sym.st_size;
662 662 if (isfunc != NULL)
663 663 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) ==
664 664 STT_FUNC);
665 665 return (sym->se_name);
666 666 }
667 667
668 668 lo = 0;
669 669 hi = (tgt->dt_symcount - 1);
670 670 found = 0;
671 671 match = osym = NULL;
672 672 while (lo <= hi) {
673 673 mid = (lo + hi) / 2;
674 674
675 675 sym = &tgt->dt_symtab[mid];
676 676
677 677 if (addr >= sym->se_sym.st_value &&
678 678 addr < sym->se_sym.st_value + sym->se_sym.st_size &&
679 679 (!found || sym->se_sym.st_value > osym->se_sym.st_value)) {
680 680 osym = sym;
681 681 found = 1;
682 682 } else if (addr == sym->se_sym.st_value) {
683 683 /*
684 684 * Particularly for .plt objects, it's possible to have
685 685 * a zero sized object. We want to return this, but we
686 686 * want it to be a last resort.
687 687 */
688 688 match = sym;
689 689 }
690 690
691 691 if (addr < sym->se_sym.st_value)
692 692 hi = mid - 1;
693 693 else
694 694 lo = mid + 1;
695 695 }
696 696
697 697 if (!found) {
698 698 if (match)
699 699 osym = match;
700 700 else
701 701 return (NULL);
702 702 }
703 703
704 704 /*
705 705 * Walk backwards to find the best match.
706 706 */
707 707 do {
708 708 sym = osym;
709 709
710 710 if (osym == tgt->dt_symtab)
711 711 break;
712 712
713 713 osym = osym - 1;
714 714 } while ((sym->se_sym.st_value == osym->se_sym.st_value) &&
715 715 (addr >= osym->se_sym.st_value) &&
716 716 (addr < osym->se_sym.st_value + osym->se_sym.st_size));
717 717
718 718 if (cache_result)
↓ open down ↓ |
718 lines elided |
↑ open up ↑ |
719 719 tgt->dt_symcache = sym;
720 720
721 721 *offset = addr - sym->se_sym.st_value;
722 722 *size = sym->se_sym.st_size;
723 723 if (isfunc)
724 724 *isfunc = (GELF_ST_TYPE(sym->se_sym.st_info) == STT_FUNC);
725 725
726 726 return (sym->se_name);
727 727 }
728 728
729 -#if !defined(__sparc)
730 729 /*
731 730 * Given an address, return the starting offset of the next symbol in the file.
732 731 * Only needed on variable length instruction architectures.
733 732 */
734 733 off_t
735 734 dis_tgt_next_symbol(dis_tgt_t *tgt, uint64_t addr)
736 735 {
737 736 sym_entry_t *sym;
738 737
739 738 sym = (tgt->dt_symcache != NULL) ? tgt->dt_symcache : tgt->dt_symtab;
740 739
741 740 while (sym != (tgt->dt_symtab + tgt->dt_symcount)) {
742 741 if (sym->se_sym.st_value >= addr)
743 742 return (sym->se_sym.st_value - addr);
744 743 sym++;
745 744 }
746 745
747 746 return (0);
748 747 }
749 -#endif
750 748
751 749 /*
752 750 * Iterate over all sections in the target, executing the given callback for
753 751 * each.
754 752 */
755 753 void
756 754 dis_tgt_section_iter(dis_tgt_t *tgt, section_iter_f func, void *data)
757 755 {
758 756 dis_scn_t sdata;
759 757 Elf_Scn *scn;
760 758 int idx;
761 759
762 760 for (scn = elf_nextscn(tgt->dt_elf, NULL), idx = 1; scn != NULL;
763 761 scn = elf_nextscn(tgt->dt_elf, scn), idx++) {
764 762
765 763 if (gelf_getshdr(scn, &sdata.ds_shdr) == NULL) {
766 764 warn("%s: failed to get section %d header",
767 765 tgt->dt_filename, idx);
768 766 continue;
769 767 }
770 768
771 769 if ((sdata.ds_name = elf_strptr(tgt->dt_elf, tgt->dt_shstrndx,
772 770 sdata.ds_shdr.sh_name)) == NULL) {
773 771 warn("%s: failed to get section %d name",
774 772 tgt->dt_filename, idx);
775 773 continue;
776 774 }
777 775
778 776 if ((sdata.ds_data = elf_getdata(scn, NULL)) == NULL) {
779 777 warn("%s: failed to get data for section '%s'",
780 778 tgt->dt_filename, sdata.ds_name);
781 779 continue;
782 780 }
783 781
784 782 /*
785 783 * dis_tgt_section_iter is also used before the section map
786 784 * is initialized, so only check when we need to. If the
787 785 * section map is uninitialized, it will return 0 and have
788 786 * no net effect.
789 787 */
790 788 if (sdata.ds_shdr.sh_addr == 0)
791 789 sdata.ds_shdr.sh_addr = tgt->dt_shnmap[idx].dm_start;
792 790
793 791 func(tgt, &sdata, data);
794 792 }
795 793 }
796 794
797 795 /*
798 796 * Return 1 if the given section contains text, 0 otherwise.
799 797 */
800 798 int
801 799 dis_section_istext(dis_scn_t *scn)
802 800 {
803 801 return ((scn->ds_shdr.sh_type == SHT_PROGBITS) &&
804 802 (scn->ds_shdr.sh_flags == (SHF_ALLOC | SHF_EXECINSTR)));
805 803 }
806 804
807 805 /*
808 806 * Return a pointer to the section data.
809 807 */
810 808 void *
811 809 dis_section_data(dis_scn_t *scn)
812 810 {
813 811 return (scn->ds_data->d_buf);
814 812 }
815 813
816 814 /*
817 815 * Return the size of the section data.
818 816 */
819 817 size_t
820 818 dis_section_size(dis_scn_t *scn)
821 819 {
822 820 return (scn->ds_data->d_size);
823 821 }
824 822
825 823 /*
826 824 * Return the address for the given section.
827 825 */
828 826 uint64_t
829 827 dis_section_addr(dis_scn_t *scn)
830 828 {
831 829 return (scn->ds_shdr.sh_addr);
832 830 }
833 831
834 832 /*
835 833 * Return the name of the current section.
836 834 */
837 835 const char *
838 836 dis_section_name(dis_scn_t *scn)
839 837 {
840 838 return (scn->ds_name);
841 839 }
842 840
843 841 /*
844 842 * Create an allocated copy of the given section
845 843 */
846 844 dis_scn_t *
847 845 dis_section_copy(dis_scn_t *scn)
848 846 {
849 847 dis_scn_t *new;
850 848
851 849 new = safe_malloc(sizeof (dis_scn_t));
852 850 (void) memcpy(new, scn, sizeof (dis_scn_t));
853 851
854 852 return (new);
855 853 }
856 854
857 855 /*
858 856 * Free section memory
859 857 */
860 858 void
861 859 dis_section_free(dis_scn_t *scn)
862 860 {
863 861 free(scn);
864 862 }
865 863
866 864 /*
867 865 * Iterate over all functions in the target, executing the given callback for
868 866 * each one.
869 867 */
870 868 void
871 869 dis_tgt_function_iter(dis_tgt_t *tgt, function_iter_f func, void *data)
872 870 {
873 871 int i;
874 872 sym_entry_t *sym;
875 873 dis_func_t df;
876 874 Elf_Scn *scn;
877 875 GElf_Shdr shdr;
878 876
879 877 for (i = 0, sym = tgt->dt_symtab; i < tgt->dt_symcount; i++, sym++) {
880 878
881 879 /* ignore non-functions */
882 880 if ((GELF_ST_TYPE(sym->se_sym.st_info) != STT_FUNC) ||
883 881 (sym->se_name == NULL) ||
884 882 (sym->se_sym.st_size == 0) ||
885 883 (sym->se_shndx >= SHN_LORESERVE))
886 884 continue;
887 885
888 886 /* get the ELF data associated with this function */
889 887 if ((scn = elf_getscn(tgt->dt_elf, sym->se_shndx)) == NULL ||
890 888 gelf_getshdr(scn, &shdr) == NULL ||
891 889 (df.df_data = elf_getdata(scn, NULL)) == NULL ||
892 890 df.df_data->d_size == 0) {
893 891 warn("%s: failed to read section %d",
894 892 tgt->dt_filename, sym->se_shndx);
895 893 continue;
896 894 }
897 895
898 896 if (tgt->dt_shnmap[sym->se_shndx].dm_mapped)
899 897 shdr.sh_addr = tgt->dt_shnmap[sym->se_shndx].dm_start;
900 898
901 899 /*
902 900 * Verify that the address lies within the section that we think
903 901 * it does.
904 902 */
905 903 if (sym->se_sym.st_value < shdr.sh_addr ||
906 904 (sym->se_sym.st_value + sym->se_sym.st_size) >
907 905 (shdr.sh_addr + shdr.sh_size)) {
908 906 warn("%s: bad section %d for address %p",
909 907 tgt->dt_filename, sym->se_sym.st_shndx,
910 908 sym->se_sym.st_value);
911 909 continue;
912 910 }
913 911
914 912 df.df_sym = sym;
915 913 df.df_offset = sym->se_sym.st_value - shdr.sh_addr;
916 914
917 915 func(tgt, &df, data);
918 916 }
919 917 }
920 918
921 919 /*
922 920 * Return the data associated with a given function.
923 921 */
924 922 void *
925 923 dis_function_data(dis_func_t *func)
926 924 {
927 925 return ((char *)func->df_data->d_buf + func->df_offset);
928 926 }
929 927
930 928 /*
931 929 * Return the size of a function.
932 930 */
933 931 size_t
934 932 dis_function_size(dis_func_t *func)
935 933 {
936 934 return (func->df_sym->se_sym.st_size);
937 935 }
938 936
939 937 /*
940 938 * Return the address of a function.
941 939 */
942 940 uint64_t
943 941 dis_function_addr(dis_func_t *func)
944 942 {
945 943 return (func->df_sym->se_sym.st_value);
946 944 }
947 945
948 946 /*
949 947 * Return the name of the function
950 948 */
951 949 const char *
952 950 dis_function_name(dis_func_t *func)
953 951 {
954 952 return (func->df_sym->se_name);
955 953 }
956 954
957 955 /*
958 956 * Return a copy of a function.
959 957 */
960 958 dis_func_t *
961 959 dis_function_copy(dis_func_t *func)
962 960 {
963 961 dis_func_t *new;
964 962
965 963 new = safe_malloc(sizeof (dis_func_t));
966 964 (void) memcpy(new, func, sizeof (dis_func_t));
967 965
968 966 return (new);
969 967 }
970 968
971 969 /*
972 970 * Free function memory
973 971 */
974 972 void
975 973 dis_function_free(dis_func_t *func)
976 974 {
977 975 free(func);
978 976 }
↓ open down ↓ |
219 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX