kmalloc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. /*
  2. * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
  3. * The President and Fellows of Harvard College.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. * 3. Neither the name of the University nor the names of its contributors
  14. * may be used to endorse or promote products derived from this software
  15. * without specific prior written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include <types.h>
  30. #include <lib.h>
  31. #include <spinlock.h>
  32. #include <vm.h>
  33. /*
  34. * Kernel malloc.
  35. */
  36. static
  37. void
  38. fill_deadbeef(void *vptr, size_t len)
  39. {
  40. uint32_t *ptr = vptr;
  41. size_t i;
  42. for (i=0; i<len/sizeof(uint32_t); i++) {
  43. ptr[i] = 0xdeadbeef;
  44. }
  45. }
  46. ////////////////////////////////////////////////////////////
  47. //
  48. // Pool-based subpage allocator.
  49. //
  50. // It works like this:
  51. //
  52. // We allocate one page at a time and fill it with objects of size k,
  53. // for various k. Each page has its own freelist, maintained by a
  54. // linked list in the first word of each object. Each page also has a
  55. // freecount, so we know when the page is completely free and can
  56. // release it.
  57. //
  58. // No assumptions are made about the sizes k; they need not be
  59. // powers of two. Note, however, that malloc must always return
  60. // pointers aligned to the maximum alignment requirements of the
  61. // platform; thus block sizes must at least be multiples of 4,
  62. // preferably 8. They must also be at least sizeof(struct
  63. // freelist). It is only worth defining an additional block size if
  64. // more blocks would fit on a page than with the existing block
  65. // sizes, and large numbers of items of the new size are allocated.
  66. //
  67. // The free counts and addresses of the pages are maintained in
  68. // another list. Maintaining this table is a nuisance, because it
  69. // cannot recursively use the subpage allocator. (We could probably
  70. // make that work, but it would be painful.)
  71. //
  72. #undef SLOW /* consistency checks */
  73. #undef SLOWER /* lots of consistency checks */
  74. ////////////////////////////////////////
  75. #if PAGE_SIZE == 4096
  76. #define NSIZES 8
  77. static const size_t sizes[NSIZES] = { 16, 32, 64, 128, 256, 512, 1024, 2048 };
  78. #define SMALLEST_SUBPAGE_SIZE 16
  79. #define LARGEST_SUBPAGE_SIZE 2048
  80. #elif PAGE_SIZE == 8192
  81. #error "No support for 8k pages (yet?)"
  82. #else
  83. #error "Odd page size"
  84. #endif
  85. ////////////////////////////////////////
  86. struct freelist {
  87. struct freelist *next;
  88. };
  89. struct pageref {
  90. struct pageref *next_samesize;
  91. struct pageref *next_all;
  92. vaddr_t pageaddr_and_blocktype;
  93. uint16_t freelist_offset;
  94. uint16_t nfree;
  95. };
  96. #define INVALID_OFFSET (0xffff)
  97. #define PR_PAGEADDR(pr) ((pr)->pageaddr_and_blocktype & PAGE_FRAME)
  98. #define PR_BLOCKTYPE(pr) ((pr)->pageaddr_and_blocktype & ~PAGE_FRAME)
  99. #define MKPAB(pa, blk) (((pa)&PAGE_FRAME) | ((blk) & ~PAGE_FRAME))
  100. ////////////////////////////////////////
  101. /*
  102. * This is cheesy.
  103. *
  104. * The problem is not that it's wasteful - we can only allocate whole
  105. * pages of pageref structures at a time anyway. The problem is that
  106. * we really ought to be able to have more than one of these pages.
  107. *
  108. * However, for the time being, one page worth of pagerefs gives us
  109. * 256 pagerefs; this lets us manage 256 * 4k = 1M of kernel heap.
  110. * That would be twice as much memory as we get for *everything*.
  111. * Thus, we will cheat and not allow any mechanism for having a second
  112. * page of pageref structs.
  113. *
  114. * Then, since the size is fixed at one page, we'll simplify a bit
  115. * further by allocating the page in the kernel BSS instead of calling
  116. * alloc_kpages to get it.
  117. */
  118. #define NPAGEREFS (PAGE_SIZE / sizeof(struct pageref))
  119. static struct pageref pagerefs[NPAGEREFS];
  120. #define INUSE_WORDS (NPAGEREFS/32)
  121. static uint32_t pagerefs_inuse[INUSE_WORDS];
  122. static
  123. struct pageref *
  124. allocpageref(void)
  125. {
  126. unsigned i,j;
  127. uint32_t k;
  128. for (i=0; i<INUSE_WORDS; i++) {
  129. if (pagerefs_inuse[i]==0xffffffff) {
  130. /* full */
  131. continue;
  132. }
  133. for (k=1,j=0; k!=0; k<<=1,j++) {
  134. if ((pagerefs_inuse[i] & k)==0) {
  135. pagerefs_inuse[i] |= k;
  136. return &pagerefs[i*32 + j];
  137. }
  138. }
  139. KASSERT(0);
  140. }
  141. /* ran out */
  142. return NULL;
  143. }
  144. static
  145. void
  146. freepageref(struct pageref *p)
  147. {
  148. size_t i, j;
  149. uint32_t k;
  150. j = p-pagerefs;
  151. KASSERT(j < NPAGEREFS); /* note: j is unsigned, don't test < 0 */
  152. i = j/32;
  153. k = ((uint32_t)1) << (j%32);
  154. KASSERT((pagerefs_inuse[i] & k) != 0);
  155. pagerefs_inuse[i] &= ~k;
  156. }
  157. ////////////////////////////////////////
  158. static struct pageref *sizebases[NSIZES];
  159. static struct pageref *allbase;
  160. ////////////////////////////////////////
  161. /*
  162. * Use one spinlock for the whole thing. Making parts of the kmalloc
  163. * logic per-cpu is worthwhile for scalability; however, for the time
  164. * being at least we won't, because it adds a lot of complexity and in
  165. * OS/161 performance and scalability aren't super-critical.
  166. */
  167. static struct spinlock kmalloc_spinlock = SPINLOCK_INITIALIZER;
  168. ////////////////////////////////////////
  169. /* SLOWER implies SLOW */
  170. #ifdef SLOWER
  171. #ifndef SLOW
  172. #define SLOW
  173. #endif
  174. #endif
  175. #ifdef SLOW
  176. static
  177. void
  178. checksubpage(struct pageref *pr)
  179. {
  180. vaddr_t prpage, fla;
  181. struct freelist *fl;
  182. int blktype;
  183. int nfree=0;
  184. KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
  185. if (pr->freelist_offset == INVALID_OFFSET) {
  186. KASSERT(pr->nfree==0);
  187. return;
  188. }
  189. prpage = PR_PAGEADDR(pr);
  190. blktype = PR_BLOCKTYPE(pr);
  191. KASSERT(pr->freelist_offset < PAGE_SIZE);
  192. KASSERT(pr->freelist_offset % sizes[blktype] == 0);
  193. fla = prpage + pr->freelist_offset;
  194. fl = (struct freelist *)fla;
  195. for (; fl != NULL; fl = fl->next) {
  196. fla = (vaddr_t)fl;
  197. KASSERT(fla >= prpage && fla < prpage + PAGE_SIZE);
  198. KASSERT((fla-prpage) % sizes[blktype] == 0);
  199. KASSERT(fla >= MIPS_KSEG0);
  200. KASSERT(fla < MIPS_KSEG1);
  201. nfree++;
  202. }
  203. KASSERT(nfree==pr->nfree);
  204. }
  205. #else
  206. #define checksubpage(pr) ((void)(pr))
  207. #endif
  208. #ifdef SLOWER
  209. static
  210. void
  211. checksubpages(void)
  212. {
  213. struct pageref *pr;
  214. int i;
  215. unsigned sc=0, ac=0;
  216. KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
  217. for (i=0; i<NSIZES; i++) {
  218. for (pr = sizebases[i]; pr != NULL; pr = pr->next_samesize) {
  219. checksubpage(pr);
  220. KASSERT(sc < NPAGEREFS);
  221. sc++;
  222. }
  223. }
  224. for (pr = allbase; pr != NULL; pr = pr->next_all) {
  225. checksubpage(pr);
  226. KASSERT(ac < NPAGEREFS);
  227. ac++;
  228. }
  229. KASSERT(sc==ac);
  230. }
  231. #else
  232. #define checksubpages()
  233. #endif
  234. ////////////////////////////////////////
  235. static
  236. void
  237. dumpsubpage(struct pageref *pr)
  238. {
  239. vaddr_t prpage, fla;
  240. struct freelist *fl;
  241. int blktype;
  242. unsigned i, n, index;
  243. uint32_t freemap[PAGE_SIZE / (SMALLEST_SUBPAGE_SIZE*32)];
  244. checksubpage(pr);
  245. KASSERT(spinlock_do_i_hold(&kmalloc_spinlock));
  246. /* clear freemap[] */
  247. for (i=0; i<sizeof(freemap)/sizeof(freemap[0]); i++) {
  248. freemap[i] = 0;
  249. }
  250. prpage = PR_PAGEADDR(pr);
  251. blktype = PR_BLOCKTYPE(pr);
  252. /* compute how many bits we need in freemap and assert we fit */
  253. n = PAGE_SIZE / sizes[blktype];
  254. KASSERT(n <= 32*sizeof(freemap)/sizeof(freemap[0]));
  255. if (pr->freelist_offset != INVALID_OFFSET) {
  256. fla = prpage + pr->freelist_offset;
  257. fl = (struct freelist *)fla;
  258. for (; fl != NULL; fl = fl->next) {
  259. fla = (vaddr_t)fl;
  260. index = (fla-prpage) / sizes[blktype];
  261. KASSERT(index<n);
  262. freemap[index/32] |= (1<<(index%32));
  263. }
  264. }
  265. kprintf("at 0x%08lx: size %-4lu %u/%u free\n",
  266. (unsigned long)prpage, (unsigned long) sizes[blktype],
  267. (unsigned) pr->nfree, n);
  268. kprintf(" ");
  269. for (i=0; i<n; i++) {
  270. int val = (freemap[i/32] & (1<<(i%32)))!=0;
  271. kprintf("%c", val ? '.' : '*');
  272. if (i%64==63 && i<n-1) {
  273. kprintf("\n ");
  274. }
  275. }
  276. kprintf("\n");
  277. }
  278. void
  279. kheap_printstats(void)
  280. {
  281. struct pageref *pr;
  282. /* print the whole thing with interrupts off */
  283. spinlock_acquire(&kmalloc_spinlock);
  284. kprintf("Subpage allocator status:\n");
  285. for (pr = allbase; pr != NULL; pr = pr->next_all) {
  286. dumpsubpage(pr);
  287. }
  288. spinlock_release(&kmalloc_spinlock);
  289. }
  290. ////////////////////////////////////////
  291. static
  292. void
  293. remove_lists(struct pageref *pr, int blktype)
  294. {
  295. struct pageref **guy;
  296. KASSERT(blktype>=0 && blktype<NSIZES);
  297. for (guy = &sizebases[blktype]; *guy; guy = &(*guy)->next_samesize) {
  298. checksubpage(*guy);
  299. if (*guy == pr) {
  300. *guy = pr->next_samesize;
  301. break;
  302. }
  303. }
  304. for (guy = &allbase; *guy; guy = &(*guy)->next_all) {
  305. checksubpage(*guy);
  306. if (*guy == pr) {
  307. *guy = pr->next_all;
  308. break;
  309. }
  310. }
  311. }
  312. static
  313. inline
  314. int blocktype(size_t sz)
  315. {
  316. unsigned i;
  317. for (i=0; i<NSIZES; i++) {
  318. if (sz <= sizes[i]) {
  319. return i;
  320. }
  321. }
  322. panic("Subpage allocator cannot handle allocation of size %lu\n",
  323. (unsigned long)sz);
  324. // keep compiler happy
  325. return 0;
  326. }
  327. static
  328. void *
  329. subpage_kmalloc(size_t sz)
  330. {
  331. unsigned blktype; // index into sizes[] that we're using
  332. struct pageref *pr; // pageref for page we're allocating from
  333. vaddr_t prpage; // PR_PAGEADDR(pr)
  334. vaddr_t fla; // free list entry address
  335. struct freelist *volatile fl; // free list entry
  336. void *retptr; // our result
  337. volatile int i;
  338. blktype = blocktype(sz);
  339. sz = sizes[blktype];
  340. spinlock_acquire(&kmalloc_spinlock);
  341. checksubpages();
  342. for (pr = sizebases[blktype]; pr != NULL; pr = pr->next_samesize) {
  343. /* check for corruption */
  344. KASSERT(PR_BLOCKTYPE(pr) == blktype);
  345. checksubpage(pr);
  346. if (pr->nfree > 0) {
  347. doalloc: /* comes here after getting a whole fresh page */
  348. KASSERT(pr->freelist_offset < PAGE_SIZE);
  349. prpage = PR_PAGEADDR(pr);
  350. fla = prpage + pr->freelist_offset;
  351. fl = (struct freelist *)fla;
  352. retptr = fl;
  353. fl = fl->next;
  354. pr->nfree--;
  355. if (fl != NULL) {
  356. KASSERT(pr->nfree > 0);
  357. fla = (vaddr_t)fl;
  358. KASSERT(fla - prpage < PAGE_SIZE);
  359. pr->freelist_offset = fla - prpage;
  360. }
  361. else {
  362. KASSERT(pr->nfree == 0);
  363. pr->freelist_offset = INVALID_OFFSET;
  364. }
  365. checksubpages();
  366. spinlock_release(&kmalloc_spinlock);
  367. return retptr;
  368. }
  369. }
  370. /*
  371. * No page of the right size available.
  372. * Make a new one.
  373. *
  374. * We release the spinlock while calling alloc_kpages. This
  375. * avoids deadlock if alloc_kpages needs to come back here.
  376. * Note that this means things can change behind our back...
  377. */
  378. spinlock_release(&kmalloc_spinlock);
  379. prpage = alloc_kpages(1);
  380. if (prpage==0) {
  381. /* Out of memory. */
  382. kprintf("kmalloc: Subpage allocator couldn't get a page\n");
  383. return NULL;
  384. }
  385. spinlock_acquire(&kmalloc_spinlock);
  386. pr = allocpageref();
  387. if (pr==NULL) {
  388. /* Couldn't allocate accounting space for the new page. */
  389. spinlock_release(&kmalloc_spinlock);
  390. free_kpages(prpage);
  391. kprintf("kmalloc: Subpage allocator couldn't get pageref\n");
  392. return NULL;
  393. }
  394. pr->pageaddr_and_blocktype = MKPAB(prpage, blktype);
  395. pr->nfree = PAGE_SIZE / sizes[blktype];
  396. /*
  397. * Note: fl is volatile because the MIPS toolchain we were
  398. * using in spring 2001 attempted to optimize this loop and
  399. * blew it. Making fl volatile inhibits the optimization.
  400. */
  401. fla = prpage;
  402. fl = (struct freelist *)fla;
  403. fl->next = NULL;
  404. for (i=1; i<pr->nfree; i++) {
  405. fl = (struct freelist *)(fla + i*sizes[blktype]);
  406. fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]);
  407. KASSERT(fl != fl->next);
  408. }
  409. fla = (vaddr_t) fl;
  410. pr->freelist_offset = fla - prpage;
  411. KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]);
  412. pr->next_samesize = sizebases[blktype];
  413. sizebases[blktype] = pr;
  414. pr->next_all = allbase;
  415. allbase = pr;
  416. /* This is kind of cheesy, but avoids duplicating the alloc code. */
  417. goto doalloc;
  418. }
  419. static
  420. int
  421. subpage_kfree(void *ptr)
  422. {
  423. int blktype; // index into sizes[] that we're using
  424. vaddr_t ptraddr; // same as ptr
  425. struct pageref *pr; // pageref for page we're freeing in
  426. vaddr_t prpage; // PR_PAGEADDR(pr)
  427. vaddr_t fla; // free list entry address
  428. struct freelist *fl; // free list entry
  429. vaddr_t offset; // offset into page
  430. ptraddr = (vaddr_t)ptr;
  431. spinlock_acquire(&kmalloc_spinlock);
  432. checksubpages();
  433. for (pr = allbase; pr; pr = pr->next_all) {
  434. prpage = PR_PAGEADDR(pr);
  435. blktype = PR_BLOCKTYPE(pr);
  436. /* check for corruption */
  437. KASSERT(blktype>=0 && blktype<NSIZES);
  438. checksubpage(pr);
  439. if (ptraddr >= prpage && ptraddr < prpage + PAGE_SIZE) {
  440. break;
  441. }
  442. }
  443. if (pr==NULL) {
  444. /* Not on any of our pages - not a subpage allocation */
  445. spinlock_release(&kmalloc_spinlock);
  446. return -1;
  447. }
  448. offset = ptraddr - prpage;
  449. /* Check for proper positioning and alignment */
  450. if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) {
  451. panic("kfree: subpage free of invalid addr %p\n", ptr);
  452. }
  453. /*
  454. * Clear the block to 0xdeadbeef to make it easier to detect
  455. * uses of dangling pointers.
  456. */
  457. fill_deadbeef(ptr, sizes[blktype]);
  458. /*
  459. * We probably ought to check for free twice by seeing if the block
  460. * is already on the free list. But that's expensive, so we don't.
  461. */
  462. fla = prpage + offset;
  463. fl = (struct freelist *)fla;
  464. if (pr->freelist_offset == INVALID_OFFSET) {
  465. fl->next = NULL;
  466. } else {
  467. fl->next = (struct freelist *)(prpage + pr->freelist_offset);
  468. }
  469. pr->freelist_offset = offset;
  470. pr->nfree++;
  471. KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]);
  472. if (pr->nfree == PAGE_SIZE / sizes[blktype]) {
  473. /* Whole page is free. */
  474. remove_lists(pr, blktype);
  475. freepageref(pr);
  476. /* Call free_kpages without kmalloc_spinlock. */
  477. spinlock_release(&kmalloc_spinlock);
  478. free_kpages(prpage);
  479. }
  480. else {
  481. spinlock_release(&kmalloc_spinlock);
  482. }
  483. #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */
  484. spinlock_acquire(&kmalloc_spinlock);
  485. checksubpages();
  486. spinlock_release(&kmalloc_spinlock);
  487. #endif
  488. return 0;
  489. }
  490. //
  491. ////////////////////////////////////////////////////////////
  492. void *
  493. kmalloc(size_t sz)
  494. {
  495. if (sz>=LARGEST_SUBPAGE_SIZE) {
  496. unsigned long npages;
  497. vaddr_t address;
  498. /* Round up to a whole number of pages. */
  499. npages = (sz + PAGE_SIZE - 1)/PAGE_SIZE;
  500. address = alloc_kpages(npages);
  501. if (address==0) {
  502. return NULL;
  503. }
  504. return (void *)address;
  505. }
  506. return subpage_kmalloc(sz);
  507. }
  508. void
  509. kfree(void *ptr)
  510. {
  511. /*
  512. * Try subpage first; if that fails, assume it's a big allocation.
  513. */
  514. if (ptr == NULL) {
  515. return;
  516. } else if (subpage_kfree(ptr)) {
  517. KASSERT((vaddr_t)ptr%PAGE_SIZE==0);
  518. free_kpages((vaddr_t)ptr);
  519. }
  520. }