Commit | Line | Data |
---|---|---|
d8f8adfe LC |
1 | The memory statistics code leads to segfaults during initialization (on |
2 | machines with InfiniPath networking): | |
3 | ||
4 | (gdb) bt full | |
5 | #0 ips_ptl_init (ep=0x1fc6af8, ptl=0x1fc6f88, ctl=0x1fc6d78) at ptl.c:224 | |
6 | err = PSM_OK | |
7 | num_of_send_bufs = 1024 | |
8 | num_of_send_desc = 4096 | |
9 | imm_size = 128 | |
10 | context = 0x1fc6b70 | |
11 | user_info = 0x1fc6b90 | |
12 | enable_shcontexts = 0 | |
13 | current_count = <optimized out> | |
14 | #1 0x00007fb2aa672abf in __psm_ep_open_internal ( | |
15 | unique_job_key=unique_job_key@entry=0x7ffed1ee5800 "<\207\020#5\271\267\200\354x\242e8\364zo", | |
16 | devid_enabled=devid_enabled@entry=0x7ffed1ee5724, opts_i=opts_i@entry=0x7ffed1ee5810, mq=<optimized out>, | |
17 | epo=epo@entry=0x7ffed1ee5710, epido=epido@entry=0x7ffed1ee5708) at psm_ep.c:929 | |
18 | ep = 0x1fc6af8 | |
19 | num_units = 1 | |
20 | len = <optimized out> | |
21 | err = <optimized out> | |
22 | epaddr = 0x1e9dd78 | |
23 | buf = "miriel044:2.0.", '\000' <repeats 113 times> | |
24 | p = <optimized out> | |
25 | e = <optimized out> | |
26 | old_cpuaff = 0x0 | |
27 | old_unit = 0x0 | |
28 | yield_cnt = {e_void = 0xfa, e_str = 0xfa <error: Cannot access memory at address 0xfa>, e_int = 250, | |
29 | e_uint = 250, e_long = 250, e_ulong = 250, e_ulonglong = 250} | |
30 | no_cpuaff = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} | |
31 | env_unit_id = {e_void = 0xffffffffffffffff, | |
32 | e_str = 0xffffffffffffffff <error: Cannot access memory at address 0xffffffffffffffff>, e_int = -1, | |
33 | e_uint = 4294967295, e_long = -1, e_ulong = 18446744073709551615, e_ulonglong = 18446744073709551615} | |
34 | env_port_id = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} | |
35 | env_sl = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0} | |
36 | ptl_sizes = <optimized out> | |
37 | default_cpuaff = <optimized out> | |
38 | opts = {timeout = 180000000000, unit = -1, affinity = 0, shm_mbytes = 10, sendbufs_num = 1024, | |
39 | network_pkey = 65535, port = 0, outsl = 0, service_id = 1152940698815692800, | |
40 | path_res_type = PSM_PATH_RES_NONE, senddesc_num = 4096, imm_size = 128} | |
41 | amsh_ptl = 0x1fc6e48 | |
42 | ips_ptl = 0x1fc6f88 | |
43 | self_ptl = 0x1fc99c8 | |
44 | i = 3 | |
45 | ||
46 | It looks like ptl.c:24 is writing past the region that was malloc'd. | |
47 | ||
48 | Turning stats off solves the problem. | |
49 | ||
50 | diff --git a/psm_utils.c b/psm_utils.c | |
51 | index c8651fe..5514921 100644 | |
52 | --- a/psm_utils.c | |
53 | +++ b/psm_utils.c | |
54 | @@ -1058,7 +1058,7 @@ psmi_log_memstats(psmi_memtype_t type, int64_t nbytes) | |
55 | return; | |
56 | } | |
57 | ||
58 | -#define psmi_stats_mask PSMI_STATSTYPE_MEMORY | |
59 | +#define psmi_stats_mask 0 | |
60 | ||
61 | #ifdef malloc | |
62 | #undef malloc |