Compare commits
574 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ddc33821cf | |||
| 0ab7d02994 | |||
| a8c4ab221b | |||
| 87d36a7752 | |||
| 998ded414c | |||
| f78d031e64 | |||
| 4ab37dd34a | |||
| 8129dec2f7 | |||
| a1035a1878 | |||
| db169c5f90 | |||
| bbb55ef261 | |||
| 1130cafe41 | |||
| a1cf27e232 | |||
| 5a1ce99d87 | |||
| c7db296e1b | |||
| f634a750c5 | |||
| d07a196c8e | |||
| 8c56c75d2c | |||
| e54895efde | |||
| 2f8cca2d6d | |||
| 64607152ee | |||
| 20383ad3d0 | |||
| 787d34f650 | |||
| ae618a0c68 | |||
| f480376153 | |||
| e4b3a88fc6 | |||
| 69a5c53074 | |||
| 259583e936 | |||
| 0f826290d0 | |||
| e46f027894 | |||
| 3e093f6a40 | |||
| 00996b551f | |||
| 24d8697cef | |||
| be4f6741f9 | |||
| 7a2f67f5f0 | |||
| bba0425267 | |||
| beaf96b375 | |||
| f1af1ffb8f | |||
| 059fab2cc0 | |||
| f284a80656 | |||
| 5f973ab51e | |||
| 60b6713957 | |||
| ebcf9a0d6d | |||
| 942b7f8b78 | |||
| 0b0aa6c0e0 | |||
| 9705a80c82 | |||
| 99a02e2941 | |||
| b88d75720f | |||
| d2b677b6da | |||
| 083645f203 | |||
| 994b9a19ac | |||
| faa929e717 | |||
| 3ee3a9df6d | |||
| 73e1a4f1f9 | |||
| b068fde9cd | |||
| 167ea67dee | |||
| f33d85a27a | |||
| 1e8239d72a | |||
| a51a0a6f13 | |||
| cc3f6e1a4f | |||
| 5db6c311f4 | |||
| f4df713846 | |||
| 7176bb2a47 | |||
| a6bd98cc02 | |||
| 0f7462ae1c | |||
| 0d8d915d82 | |||
| 8f4f68b877 | |||
| 8c0a5a5e61 | |||
| ffd3f53785 | |||
| f39fa54c39 | |||
| 11125b0d68 | |||
| 3ae69d1290 | |||
| 2929fbb803 | |||
| f4db8b96de | |||
| 8eb3bf3559 | |||
| 326a4fcee4 | |||
| 9b82f1a52c | |||
| f3da381752 | |||
| 8aa589a40c | |||
| e03f377326 | |||
| 8d21846562 | |||
| 3e1367caa1 | |||
| 02536b7724 | |||
| e28725884f | |||
| c2b3fb7236 | |||
| 2f95f7cda8 | |||
| e551aa17ed | |||
| e6d4c160cd | |||
| 9390fe5d2c | |||
| 419f5e495b | |||
| 673deadf37 | |||
| 20ea65b38c | |||
| 84665ff699 | |||
| bfbc94dfb0 | |||
| f74dcfc2a1 | |||
| 7c562d0539 | |||
| b5e4459a34 | |||
| 782122b681 | |||
| d550bced78 | |||
| a7ee3f531b | |||
| b9439947a7 | |||
| 3b60a95f13 | |||
| 82ae6d7458 | |||
| 7ebc34ddcc | |||
| bd6a2c2311 | |||
| 5fd68eae54 | |||
| f5857cfc9e | |||
| 1ce1b17a85 | |||
| a2456c3ed2 | |||
| 01d2ea1605 | |||
| 15783f09a0 | |||
| 9efd568e07 | |||
| 1a207e19c2 | |||
| 73cf93727b | |||
| 4410e702d9 | |||
| f584e2ec25 | |||
| 3aa06444f4 | |||
| c897a56c34 | |||
| 5e9957da0f | |||
| 6ff2d4abe7 | |||
| e4239f1885 | |||
| fbbaaf5b54 | |||
| 3fa3920bb3 | |||
| 45e51fcc07 | |||
| 0884e3d543 | |||
| e3c7c9b890 | |||
| f4155cc9e8 | |||
| a01ae91051 | |||
| daca522d25 | |||
| ec521feb15 | |||
| d7bc947a02 | |||
| fb84d4ef11 | |||
| 5fbeee953a | |||
| 4cefb4333f | |||
| 689da07ac6 | |||
| 76981bcc18 | |||
| 6aae35cb3d | |||
| dac6f2883e | |||
| c484f766fa | |||
| 57690479bd | |||
| d0539a9cac | |||
| 4c8f583c0c | |||
| 6118faffa9 | |||
| dad6470c60 | |||
| 46c37fc8f3 | |||
| f6908f21a8 | |||
| 01d9d9a5ba | |||
| c43d993a4d | |||
| 7d9bbecd7a | |||
| d135731398 | |||
| 5c190beb04 | |||
| fc66556f9f | |||
| 648bacc90f | |||
| dd37443fc7 | |||
| e34322702a | |||
| e12997e6a9 | |||
| fabaa806d3 | |||
| a83ad620c8 | |||
| d90900b6e6 | |||
| 6d9a88e9f4 | |||
| d0ee60f9e3 | |||
| 14ec92518e | |||
| 435e2bdeb4 | |||
| f06d8041e3 | |||
| 9b35eaca42 | |||
| 130b1f4327 | |||
| 921280f85c | |||
| d4a0b32f06 | |||
| b3bec32e99 | |||
| 2048980820 | |||
| 176f6d23a9 | |||
| 328175547f | |||
| e2e0fad849 | |||
| 397bf3f4a6 | |||
| aa77228453 | |||
| 82cb8f95ed | |||
| 3f2b4e7282 | |||
| d6784bb4a5 | |||
| 1bb948f43b | |||
| 2a1823d52c | |||
| 89943dc5ba | |||
| fceb02a44a | |||
| 7298d8e179 | |||
| 6f32544dde | |||
| 10d248b3cc | |||
| fb32120659 | |||
| 73de203c16 | |||
| 41bb2ab5e6 | |||
| a587c8f5e5 | |||
| 0c53a5ca35 | |||
| c760a01a79 | |||
| a2c29e8abf | |||
| 18add6a9bd | |||
| a083e6c2bf | |||
| a2548f5421 | |||
| 6790126a23 | |||
| 1195549f41 | |||
| b0096a2740 | |||
| a11479eba8 | |||
| 12eaea401e | |||
| 31595b7409 | |||
| 4a0682bbc1 | |||
| 932a287437 | |||
| 670741ae40 | |||
| 70b27e06ff | |||
| 4c38ddb623 | |||
| 6f00ddced6 | |||
| c0eecd63c9 | |||
| 1fd0b03e78 | |||
| 6c59de9300 | |||
| b1309a5d53 | |||
| 489cd6d1a2 | |||
| c9cc4330c8 | |||
| 604f846cd2 | |||
| e939cf6862 | |||
| 72f2e5ebe0 | |||
| bd7dddd415 | |||
| fbd9dc878b | |||
| d6c51ff997 | |||
| 86ac51157c | |||
| b73fa2b972 | |||
| 798f69bceb | |||
| e8be52a1ff | |||
| 8b5b075f4c | |||
| b214fc278a | |||
| b3ae7f46bd | |||
| 48167d3223 | |||
| d65135c040 | |||
| 1761acc4c3 | |||
| d4d93df032 | |||
| 261bddb999 | |||
| 1a3bc851af | |||
| 15f572ef9c | |||
| 81690c5b5a | |||
| 832c0f9afd | |||
| f92cac7751 | |||
| e74eb1dd51 | |||
| 8f7b9072ea | |||
| 4595aa3079 | |||
| 807d294ac4 | |||
| c947dd0d49 | |||
| d192e6c0fe | |||
| 7dbbcb362f | |||
| 593cf98015 | |||
| 8dd9f5ef3f | |||
| 0eaf058a4f | |||
| 1aac2c8e23 | |||
| 70e8dd7979 | |||
| eb0700359b | |||
| 3f16a9443e | |||
| bf0cf0a346 | |||
| 14b868907b | |||
| dbc778e4fa | |||
| 7fac03d4de | |||
| 26c0180374 | |||
| 8ebb3a4231 | |||
| f1f1ba9c8c | |||
| 6ce00b5f0f | |||
| 4ec0e02a89 | |||
| 8f9192ac36 | |||
| 80ce123ab6 | |||
| 1dc8513cd3 | |||
| b0054643c0 | |||
| 972ff73ecf | |||
| 1f8a859b47 | |||
| 2601d8a36f | |||
| a713c2fcaa | |||
| c4c5e435cc | |||
| 853b56c784 | |||
| 863a5c5e5f | |||
| ebce1cb031 | |||
| fff7744907 | |||
| 27c3ed7e96 | |||
| e2b28da32f | |||
| 2c50b716fd | |||
| 307b2b8da5 | |||
| eba2be8a35 | |||
| a997af71be | |||
| e7c37b8000 | |||
| 8c40f94aa8 | |||
| da13bd408a | |||
| c328d26b8d | |||
| 6cda6792a9 | |||
| 2d3fda1d0b | |||
| 5d43c135db | |||
| a866192db7 | |||
| c0cc6ac6db | |||
| 14c5bc08c2 | |||
| 7f01d273d0 | |||
| 137e0a799c | |||
| f214ff1b57 | |||
| 0ce698eb1f | |||
| e601248bdc | |||
| d8eeab9b89 | |||
| fdf031ac16 | |||
| 1ffe740153 | |||
| 72968d613e | |||
| 2e98f875c3 | |||
| a6cb9a6b93 | |||
| da0a91b9f7 | |||
| f093786bec | |||
| 368f155328 | |||
| 425f920013 | |||
| dbddf37579 | |||
| fa7a5ccd11 | |||
| 172bf0a389 | |||
| 9bafd166e3 | |||
| 2e31b8abd1 | |||
| a42ee00101 | |||
| f6935b0869 | |||
| 03a7763a5e | |||
| 3a2f7b0106 | |||
| 2819ec2197 | |||
| f7d81a9281 | |||
| 914faf042d | |||
| 75c6a94839 | |||
| f7b5b48266 | |||
| f9bd83c726 | |||
| edc275ce4f | |||
| d00ea61d1a | |||
| 01117e92c9 | |||
| d477096cb0 | |||
| f44ddfa3b3 | |||
| e0acd254b1 | |||
| d0507f7e9f | |||
| 0f8b2aba22 | |||
| 7e5c7445e2 | |||
| a055fb525d | |||
| 8cb72df663 | |||
| e805249651 | |||
| 06a7889e1f | |||
| 20deed09f0 | |||
| bb81f84709 | |||
| 5c1dad1660 | |||
| 7f2220b8e9 | |||
| 65dda3f24e | |||
| 544971d665 | |||
| dbddab4356 | |||
| 12eb8a9bb0 | |||
| 828a3ea57a | |||
| eb6de9d1de | |||
| 42c8ef6539 | |||
| 780d4fc29b | |||
| 94fcc5bb9a | |||
| e822fc47dd | |||
| 26492a2895 | |||
| 1a5ff7f535 | |||
| 4c181d7fc0 | |||
| be78eb752e | |||
| 0ad7c8ac50 | |||
| e9458a6cd3 | |||
| 9e3b0b5866 | |||
| 0eaa27291a | |||
| 0b07dd1b79 | |||
| c25f8c7a39 | |||
| 9e53ae20d4 | |||
| 09c9ee58d1 | |||
| 153a59a6f4 | |||
| cad72a8562 | |||
| 343bfbd30a | |||
| 4e4f1208f7 | |||
| a325a78866 | |||
| 6ae99454da | |||
| 04e193de13 | |||
| 2ca46fabfd | |||
| 5b737b499d | |||
| cb4f3a4d65 | |||
| 51789fcd38 | |||
| 9f50c5dc3a | |||
| cd905f7ad1 | |||
| 79266f6b97 | |||
| a666b69c2c | |||
| 47e8552eba | |||
| 8dd9175411 | |||
| f08e0c0054 | |||
| d862f345be | |||
| a14768c49a | |||
| 56e57775e7 | |||
| b3b752ba41 | |||
| 7b32f2f73b | |||
| ea5a1a8693 | |||
| 92f8fb2b2b | |||
| a3e440414d | |||
| 10ba03ccea | |||
| ccb7c30a05 | |||
| 7dfeb8e7ce | |||
| b1b706453f | |||
| bd5708286d | |||
| c8a13cf213 | |||
| 5ad0a03d18 | |||
| 3819eec03f | |||
| 40b8587a8a | |||
| e7b1115572 | |||
| e1a01803d0 | |||
| 69f4b0e1ad | |||
| 0909a5bed5 | |||
| 9dd224385e | |||
| 4176c59fd3 | |||
| afeee5432f | |||
| 9ae5bcf46e | |||
| b8f166e608 | |||
| c85a9b99e1 | |||
| 7c816a6b73 | |||
| 5a0cd3f53f | |||
| 9fa62adfe7 | |||
| f0ab8ec89a | |||
| f4cc82578d | |||
| 9ba40dc0ff | |||
| 8d6c97ea5c | |||
| 386f59000a | |||
| 215cd370a1 | |||
| 0a0e2c04a0 | |||
| aa191b87d3 | |||
| d5c243571f | |||
| 328e69a335 | |||
| b77755d0f7 | |||
| d7bae14707 | |||
| 4e58d08f5c | |||
| 9b1e691588 | |||
| 3988b0fc61 | |||
| 54eb345847 | |||
| bbe7aef95b | |||
| 1ff4cf68c2 | |||
| 1bc84d3feb | |||
| f7d78c8b7d | |||
| 7647c99cc2 | |||
| 43a774fbfc | |||
| a029bcac37 | |||
| bd913c503b | |||
| e838affde8 | |||
| 59ee251e1c | |||
| fa79db3bcc | |||
| b7c5cba361 | |||
| 382614ddae | |||
| aa959c6b34 | |||
| aabc3d386d | |||
| 4ebe778ede | |||
| fbb776e4fb | |||
| 41b85281a4 | |||
| 5532e3c663 | |||
| 2af2b1205f | |||
| 7d5a68be1b | |||
| f4162dff52 | |||
| a0d909af75 | |||
| 63669b7f71 | |||
| 4946964ed0 | |||
| 5f19842a6a | |||
| 9271d5346d | |||
| 7bba05cfa4 | |||
| c2a1f933e8 | |||
| 055769254d | |||
| 786ae83380 | |||
| 8c662c83be | |||
| 4698bc40c2 | |||
| f5d935b703 | |||
| d53865ac5f | |||
| 8934eb91a4 | |||
| ed6d94a358 | |||
| fa923da0e3 | |||
| 1f8265efbc | |||
| b553de7435 | |||
| 6a82412d64 | |||
| fa29c34995 | |||
| f84b5acf79 | |||
| 8b24f60861 | |||
| f82bb284bb | |||
| bf12a5c45e | |||
| ea5681232e | |||
| e6011be1af | |||
| 9946ccd6b1 | |||
| daec7de828 | |||
| 9ad48083aa | |||
| 2eac58aab3 | |||
| 22d8d169b6 | |||
| 8db54c2637 | |||
| 063fa963c3 | |||
| a6488adcc1 | |||
| 2239a6b09b | |||
| 8c179d506a | |||
| 377341ce5f | |||
| 8caeba7cba | |||
| 1d2f5d9893 | |||
| e4f47df3c3 | |||
| 4751055ee4 | |||
| 305ebfed0e | |||
| b66b950129 | |||
| 4aa8ba2eef | |||
| fab2c2aa97 | |||
| 026164eda4 | |||
| e91d1e5b7b | |||
| 73743eeeb0 | |||
| c1c1fd578a | |||
| f35cc66d18 | |||
| d9cf1d49b1 | |||
| 3d426ada01 | |||
| 0307f6a6cc | |||
| 0dee04f16b | |||
| 0e98e87b95 | |||
| d35e60c1a3 | |||
| 037e17c4ed | |||
| 2baf274dac | |||
| 3b04043f2a | |||
| c0edb6fe6f | |||
| bb137bc9bb | |||
| 16af976a71 | |||
| 6485578a7f | |||
| d2d0fc6721 | |||
| 9574a28a5f | |||
| dbe4ec3247 | |||
| 99debc548f | |||
| fa15f6b106 | |||
| 8568a73f33 | |||
| 8b57b2ee57 | |||
| 9a36e7b84a | |||
| d998691425 | |||
| 8cdf70c500 | |||
| 0e0bc548f6 | |||
| d21ae28843 | |||
| a4a806bef7 | |||
| d30d8fe71c | |||
| a5bdd41c3d | |||
| 5f5ab34559 | |||
| b26fa4e87c | |||
| bd5f43b119 | |||
| f97f8dbab3 | |||
| e30946f1f0 | |||
| c3ade864d9 | |||
| 9c35935671 | |||
| ed33ee65b2 | |||
| d04b5a09bd | |||
| 08cc31f9bf | |||
| cf2166f830 | |||
| 765de119dc | |||
| d46110b4d9 | |||
| 74f0aec478 | |||
| e3eb7e68bc | |||
| 912b8a886c | |||
| e25d35a191 | |||
| a9aad67541 | |||
| cd6e663f48 | |||
| 5f095b3952 | |||
| 811a275176 | |||
| b388f59ebd | |||
| ff47261337 | |||
| a91bf9a13d | |||
| fcfa94cea1 | |||
| 55f7ee1526 | |||
| b1b6fab7b8 | |||
| 391886a6f1 | |||
| c810afe224 | |||
| 5566ed1a63 | |||
| f0f91d2246 | |||
| 0942bf0ce0 | |||
| 9c94e90007 | |||
| a6ac906105 | |||
| d4ba4dc8b3 | |||
| 815d907ca4 | |||
| 3c24315f91 | |||
| 25f108bf78 | |||
| cc9d30efbf | |||
| af83f1be64 | |||
| b2cab453f1 | |||
| 8909597499 | |||
| 86f2a9067b | |||
| a5889fb5df | |||
| f1a86cfbd3 | |||
| c1cf630a94 | |||
| 8f30e16976 | |||
| 58e2e0a246 | |||
| ea02628f2b | |||
| 89acf5c5d6 | |||
| ac8e2a0c40 | |||
| ab7aa3354f | |||
| c4e0b84792 |
43
Makefile.in
43
Makefile.in
@ -1,12 +1,14 @@
|
||||
TARGET = @TARGET@
|
||||
SBINDIR = @SBINDIR@
|
||||
ETCDIR = @ETCDIR@
|
||||
MANDIR = @MANDIR@
|
||||
|
||||
all::
|
||||
@(cd executer/kernel; make modules)
|
||||
@(cd executer/kernel/mcctrl; make modules)
|
||||
@(cd executer/kernel/mcoverlayfs; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make) \
|
||||
;; \
|
||||
*) \
|
||||
@ -16,10 +18,11 @@ all::
|
||||
esac
|
||||
|
||||
install::
|
||||
@(cd executer/kernel; make install)
|
||||
@(cd executer/kernel/mcctrl; make install)
|
||||
@(cd executer/kernel/mcoverlayfs; make install)
|
||||
@(cd executer/user; make install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make install) \
|
||||
;; \
|
||||
*) \
|
||||
@ -27,19 +30,43 @@ install::
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
if [ "$(TARGET)" = attached-mic ]; then \
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
fi
|
||||
;; \
|
||||
builtin-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
|
||||
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
smp-x86) \
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
mkdir -p -m 755 $(ETCDIR); \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
exit 1 \
|
||||
;; \
|
||||
esac
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel; make clean)
|
||||
@(cd executer/kernel/mcctrl; make clean)
|
||||
@(cd executer/kernel/mcoverlayfs; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic) \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
(cd kernel; make clean) \
|
||||
;; \
|
||||
*) \
|
||||
|
||||
2
arch/x86/kernel/Makefile.arch
Normal file
2
arch/x86/kernel/Makefile.arch
Normal file
@ -0,0 +1,2 @@
|
||||
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
|
||||
IHK_OBJS += perfctr.o syscall.o vsyscall.o
|
||||
@ -10,7 +10,7 @@
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_TSS_OFFSET_SP0 4
|
||||
#define X86_CPU_LOCAL_OFFSET_SP0 \
|
||||
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -15,7 +15,7 @@
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...)
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
@ -78,15 +78,16 @@ int get_prstatus_size(void)
|
||||
* \brief Fill a prstatus structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs0 A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_prstatus(struct note *head, struct process *proc, void *regs0)
|
||||
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
|
||||
{
|
||||
void *name;
|
||||
struct elf_prstatus64 *prstatus;
|
||||
struct x86_regs *regs = regs0;
|
||||
struct x86_user_context *uctx = regs0;
|
||||
struct x86_basic_regs *regs = &uctx->gpr;
|
||||
register unsigned long _r12 asm("r12");
|
||||
register unsigned long _r13 asm("r13");
|
||||
register unsigned long _r14 asm("r14");
|
||||
@ -159,11 +160,11 @@ int get_prpsinfo_size(void)
|
||||
* \brief Fill a prpsinfo structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
|
||||
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
|
||||
{
|
||||
void *name;
|
||||
struct elf_prpsinfo64 *prpsinfo;
|
||||
@ -175,8 +176,8 @@ void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
|
||||
memcpy(name, "CORE", sizeof("CORE"));
|
||||
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
|
||||
|
||||
prpsinfo->pr_state = proc->ftn->status;
|
||||
prpsinfo->pr_pid = proc->ftn->pid;
|
||||
prpsinfo->pr_state = thread->status;
|
||||
prpsinfo->pr_pid = thread->proc->pid;
|
||||
|
||||
/*
|
||||
We leave most of the fields unfilled.
|
||||
@ -209,11 +210,11 @@ int get_auxv_size(void)
|
||||
* \brief Fill an AUXV structure.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_auxv(struct note *head, struct process *proc, void *regs)
|
||||
void fill_auxv(struct note *head, struct thread *thread, void *regs)
|
||||
{
|
||||
void *name;
|
||||
void *auxv;
|
||||
@ -224,7 +225,7 @@ void fill_auxv(struct note *head, struct process *proc, void *regs)
|
||||
name = (void *) (head + 1);
|
||||
memcpy(name, "CORE", sizeof("CORE"));
|
||||
auxv = name + align32(sizeof("CORE"));
|
||||
memcpy(auxv, proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
|
||||
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -242,23 +243,23 @@ int get_note_size(void)
|
||||
* \brief Fill the NOTE segment.
|
||||
*
|
||||
* \param head A pointer to a note structure.
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
*/
|
||||
|
||||
void fill_note(void *note, struct process *proc, void *regs)
|
||||
void fill_note(void *note, struct thread *thread, void *regs)
|
||||
{
|
||||
fill_prstatus(note, proc, regs);
|
||||
fill_prstatus(note, thread, regs);
|
||||
note += get_prstatus_size();
|
||||
fill_prpsinfo(note, proc, regs);
|
||||
fill_prpsinfo(note, thread, regs);
|
||||
note += get_prpsinfo_size();
|
||||
fill_auxv(note, proc, regs);
|
||||
fill_auxv(note, thread, regs);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Generate an image of the core file.
|
||||
*
|
||||
* \param proc A pointer to the current process structure.
|
||||
* \param thread A pointer to the current thread structure.
|
||||
* \param regs A pointer to a x86_regs structure.
|
||||
* \param coretable(out) An array of core chunks.
|
||||
* \param chunks(out) Number of the entires of coretable.
|
||||
@ -270,7 +271,18 @@ void fill_note(void *note, struct process *proc, void *regs)
|
||||
* should be zero.
|
||||
*/
|
||||
|
||||
int gencore(struct process *proc, void *regs,
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@ requires \valid(regs);
|
||||
@ requires \valid(coretable);
|
||||
@ requires \valid(chunks);
|
||||
@ behavior success:
|
||||
@ ensures \result == 0;
|
||||
@ assigns coretable;
|
||||
@ behavior failure:
|
||||
@ ensures \result == -1;
|
||||
@*/
|
||||
int gencore(struct thread *thread, void *regs,
|
||||
struct coretable **coretable, int *chunks)
|
||||
{
|
||||
struct coretable *ct = NULL;
|
||||
@ -278,7 +290,7 @@ int gencore(struct process *proc, void *regs,
|
||||
Elf64_Phdr *ph = NULL;
|
||||
void *note = NULL;
|
||||
struct vm_range *range;
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct process_vm *vm = thread->vm;
|
||||
int segs = 1; /* the first one is for NOTE */
|
||||
int notesize, phsize, alignednotesize;
|
||||
unsigned int offset = 0;
|
||||
@ -305,7 +317,7 @@ int gencore(struct process *proc, void *regs,
|
||||
unsigned long p, phys;
|
||||
int prevzero = 0;
|
||||
for (p = range->start; p < range->end; p += PAGE_SIZE) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)p, &phys) != 0) {
|
||||
prevzero = 1;
|
||||
} else {
|
||||
@ -325,7 +337,7 @@ int gencore(struct process *proc, void *regs,
|
||||
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
|
||||
|
||||
{
|
||||
struct vm_regions region = proc->vm->region;
|
||||
struct vm_regions region = thread->vm->region;
|
||||
|
||||
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
|
||||
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
|
||||
@ -363,7 +375,7 @@ int gencore(struct process *proc, void *regs,
|
||||
goto fail;
|
||||
}
|
||||
memset(note, 0, alignednotesize);
|
||||
fill_note(note, proc, regs);
|
||||
fill_note(note, thread, regs);
|
||||
|
||||
/* prgram header for NOTE segment is exceptional */
|
||||
ph[0].p_type = PT_NOTE;
|
||||
@ -433,7 +445,7 @@ int gencore(struct process *proc, void *regs,
|
||||
|
||||
for (start = p = range->start;
|
||||
p < range->end; p += PAGE_SIZE) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)p, &phys) != 0) {
|
||||
if (prevzero == 0) {
|
||||
/* We begin a new chunk */
|
||||
@ -471,9 +483,9 @@ int gencore(struct process *proc, void *regs,
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
if ((proc->vm->region.user_start <= range->start) &&
|
||||
(range->end <= proc->vm->region.user_end)) {
|
||||
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
|
||||
if ((thread->vm->region.user_start <= range->start) &&
|
||||
(range->end <= thread->vm->region.user_end)) {
|
||||
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
|
||||
(void *)range->start, &phys) != 0) {
|
||||
dkprintf("could not convert user virtual address %lx"
|
||||
"to physical address", range->start);
|
||||
@ -509,6 +521,10 @@ int gencore(struct process *proc, void *regs,
|
||||
* \param coretable An array of core chunks.
|
||||
*/
|
||||
|
||||
/*@
|
||||
@ requires \valid(coretable);
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void freecore(struct coretable **coretable)
|
||||
{
|
||||
struct coretable *ct = *coretable;
|
||||
|
||||
98
arch/x86/kernel/include/arch-bitops.h
Normal file
98
arch/x86/kernel/include/arch-bitops.h
Normal file
@ -0,0 +1,98 @@
|
||||
/**
|
||||
* \file arch-bitops.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Find last set bit in word.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef HEADER_X86_COMMON_ARCH_BITOPS_H
|
||||
#define HEADER_X86_COMMON_ARCH_BITOPS_H
|
||||
|
||||
#define ARCH_HAS_FAST_MULTIPLIER 1
|
||||
|
||||
static inline int fls(int x)
|
||||
{
|
||||
int r;
|
||||
asm("bsrl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $-1,%0\n"
|
||||
"1:" : "=r" (r) : "rm" (x));
|
||||
|
||||
return r + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* ffs - find first set bit in word
|
||||
* @x: the word to search
|
||||
*
|
||||
* This is defined the same way as the libc and compiler builtin ffs
|
||||
* routines, therefore differs in spirit from the other bitops.
|
||||
*
|
||||
* ffs(value) returns 0 if value is 0 or the position of the first
|
||||
* set bit if value is nonzero. The first (least significant) bit
|
||||
* is at position 1.
|
||||
*/
|
||||
static inline int ffs(int x)
|
||||
{
|
||||
int r;
|
||||
asm("bsfl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $-1,%0\n"
|
||||
"1:" : "=r" (r) : "rm" (x));
|
||||
return r + 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* __ffs - find first set bit in word
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
asm("bsf %1,%0"
|
||||
: "=r" (word)
|
||||
: "rm" (word));
|
||||
return word;
|
||||
}
|
||||
|
||||
/**
|
||||
* ffz - find first zero bit in word
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no zero exists, so code should check against ~0UL first.
|
||||
*/
|
||||
static inline unsigned long ffz(unsigned long word)
|
||||
{
|
||||
asm("bsf %1,%0"
|
||||
: "=r" (word)
|
||||
: "r" (~word));
|
||||
return word;
|
||||
}
|
||||
|
||||
|
||||
#define ADDR (*(volatile long *)addr)
|
||||
|
||||
static inline void set_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("lock; btsl %1,%0"
|
||||
: "+m" (ADDR)
|
||||
: "Ir" (nr)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline void clear_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("lock; btrl %1,%0"
|
||||
: "+m" (ADDR)
|
||||
: "Ir" (nr)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif
|
||||
67
arch/x86/kernel/include/arch-futex.h
Normal file
67
arch/x86/kernel/include/arch-futex.h
Normal file
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* \file futex.h
|
||||
* Licence details are found in the file LICENSE.
|
||||
*
|
||||
* \brief
|
||||
* Futex adaptation to McKernel
|
||||
*
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
*
|
||||
*
|
||||
* HISTORY:
|
||||
*
|
||||
*/
|
||||
#ifndef _ARCH_FUTEX_H
|
||||
#define _ARCH_FUTEX_H
|
||||
#include <asm.h>
|
||||
|
||||
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\t" insn "\n" \
|
||||
"2:\t.section .fixup,\"ax\"\n" \
|
||||
"3:\tmov\t%3, %1\n" \
|
||||
"\tjmp\t2b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 3b) \
|
||||
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
|
||||
: "i" (-EFAULT), "0" (oparg), "1" (0))
|
||||
|
||||
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\tmovl %2, %0\n" \
|
||||
"\tmovl\t%0, %3\n" \
|
||||
"\t" insn "\n" \
|
||||
"2:\tlock; cmpxchgl %3, %2\n" \
|
||||
"\tjnz\t1b\n" \
|
||||
"3:\t.section .fixup,\"ax\"\n" \
|
||||
"4:\tmov\t%5, %1\n" \
|
||||
"\tjmp\t3b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 4b) \
|
||||
_ASM_EXTABLE(2b, 4b) \
|
||||
: "=&a" (oldval), "=&r" (ret), \
|
||||
"+m" (*uaddr), "=&r" (tem) \
|
||||
: "r" (oparg), "i" (-EFAULT), "1" (0))
|
||||
|
||||
static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
|
||||
int newval)
|
||||
{
|
||||
#ifdef __UACCESS__
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
|
||||
asm volatile("1:\tlock; cmpxchgl %3, %1\n"
|
||||
"2:\t.section .fixup, \"ax\"\n"
|
||||
"3:\tmov %2, %0\n"
|
||||
"\tjmp 2b\n"
|
||||
"\t.previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: "=a" (oldval), "+m" (*uaddr)
|
||||
: "i" (-EFAULT), "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -5,15 +5,20 @@
|
||||
#define __HEADER_X86_COMMON_ARCH_LOCK
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
#include <ihk/atomic.h>
|
||||
|
||||
//#define DEBUG_SPINLOCK
|
||||
//#define DEBUG_MCS_RWLOCK
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
|
||||
int __kprintf(const char *format, ...);
|
||||
#endif
|
||||
|
||||
typedef int ihk_spinlock_t;
|
||||
|
||||
extern void preempt_enable(void);
|
||||
extern void preempt_disable(void);
|
||||
|
||||
#define IHK_STATIC_SPINLOCK_FUNCS
|
||||
|
||||
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
@ -22,7 +27,17 @@ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
}
|
||||
#define SPIN_LOCK_UNLOCKED 0
|
||||
|
||||
static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock_noirq(l) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_lock_noirq(l); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
|
||||
#endif
|
||||
|
||||
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
int inc = 0x00010000;
|
||||
int tmp;
|
||||
@ -41,10 +56,8 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] trying to grab lock: 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), lock);
|
||||
#endif
|
||||
preempt_disable();
|
||||
|
||||
asm volatile("lock; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
@ -60,36 +73,455 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
:
|
||||
: "memory", "cc");
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] holding lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
|
||||
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
rc = __ihk_mc_spinlock_lock(l);\
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
|
||||
})
|
||||
#else
|
||||
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
|
||||
#endif
|
||||
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = cpu_disable_interrupt_save();
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(lock);
|
||||
__ihk_mc_spinlock_lock_noirq(lock);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_unlock_noirq(l) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_unlock_noirq(l); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_unlock(l, f) { \
|
||||
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__ihk_mc_spinlock_unlock((l), (f)); \
|
||||
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(lock);
|
||||
__ihk_mc_spinlock_unlock_noirq(lock);
|
||||
|
||||
cpu_restore_interrupt(flags);
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
__kprintf("[%d] released lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
|
||||
}
|
||||
|
||||
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
|
||||
typedef struct mcs_lock_node {
|
||||
unsigned long locked;
|
||||
struct mcs_lock_node *next;
|
||||
} __attribute__((aligned(64))) mcs_lock_node_t;
|
||||
|
||||
static void mcs_lock_init(struct mcs_lock_node *node)
|
||||
{
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
}
|
||||
|
||||
static void mcs_lock_lock(struct mcs_lock_node *lock,
|
||||
struct mcs_lock_node *node)
|
||||
{
|
||||
struct mcs_lock_node *pred;
|
||||
|
||||
node->next = NULL;
|
||||
node->locked = 0;
|
||||
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
node->locked = 1;
|
||||
pred->next = node;
|
||||
while (node->locked != 0) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mcs_lock_unlock(struct mcs_lock_node *lock,
|
||||
struct mcs_lock_node *node)
|
||||
{
|
||||
if (node->next == NULL) {
|
||||
struct mcs_lock_node *old = (struct mcs_lock_node *)
|
||||
atomic_cmpxchg8((unsigned long *)&lock->next,
|
||||
(unsigned long)node, (unsigned long)0);
|
||||
|
||||
if (old == node) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
node->next->locked = 0;
|
||||
}
|
||||
|
||||
// reader/writer lock
|
||||
typedef struct mcs_rwlock_node {
|
||||
ihk_atomic_t count; // num of readers (use only common reader)
|
||||
char type; // lock type
|
||||
#define MCS_RWLOCK_TYPE_COMMON_READER 0
|
||||
#define MCS_RWLOCK_TYPE_READER 1
|
||||
#define MCS_RWLOCK_TYPE_WRITER 2
|
||||
char locked; // lock
|
||||
#define MCS_RWLOCK_LOCKED 1
|
||||
#define MCS_RWLOCK_UNLOCKED 0
|
||||
char dmy1; // unused
|
||||
char dmy2; // unused
|
||||
struct mcs_rwlock_node *next;
|
||||
} __attribute__((aligned(64))) mcs_rwlock_node_t;
|
||||
|
||||
typedef struct mcs_rwlock_node_irqsave {
|
||||
struct mcs_rwlock_node node;
|
||||
unsigned long irqsave;
|
||||
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
|
||||
|
||||
typedef struct mcs_rwlock_lock {
|
||||
struct mcs_rwlock_node reader; /* common reader lock */
|
||||
struct mcs_rwlock_node *node; /* base */
|
||||
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
|
||||
|
||||
static void
|
||||
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
|
||||
{
|
||||
ihk_atomic_set(&lock->reader.count, 0);
|
||||
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
|
||||
lock->node = NULL;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_lock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_lock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
struct mcs_rwlock_node *pred;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
node->type = MCS_RWLOCK_TYPE_WRITER;
|
||||
node->next = NULL;
|
||||
|
||||
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
pred->next = node;
|
||||
while (node->locked != MCS_RWLOCK_UNLOCKED) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
|
||||
{
|
||||
struct mcs_rwlock_node *p;
|
||||
struct mcs_rwlock_node *f = NULL;
|
||||
struct mcs_rwlock_node *n;
|
||||
int breakf = 0;
|
||||
|
||||
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
|
||||
for(p = &lock->reader; p->next; p = n){
|
||||
n = p->next;
|
||||
if(p->next->type == MCS_RWLOCK_TYPE_READER){
|
||||
p->next = n->next;
|
||||
if(lock->node == n){
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)n,
|
||||
(unsigned long)p);
|
||||
|
||||
if(old != n){ // couldn't change
|
||||
while (n->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
p->next = n->next;
|
||||
}
|
||||
else{
|
||||
breakf = 1;
|
||||
}
|
||||
}
|
||||
else if(p->next == NULL){
|
||||
while (n->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
p->next = n->next;
|
||||
}
|
||||
if(f){
|
||||
ihk_atomic_inc(&lock->reader.count);
|
||||
n->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
else
|
||||
f = n;
|
||||
n = p;
|
||||
if(breakf)
|
||||
break;
|
||||
}
|
||||
if(n->next == NULL && lock->node != n){
|
||||
while (n->next == NULL && lock->node != n) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
if (node->next == NULL) {
|
||||
struct mcs_rwlock_node *old = (struct mcs_rwlock_node *)
|
||||
atomic_cmpxchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node, (unsigned long)0);
|
||||
|
||||
if (old == node) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
if(node->next->type == MCS_RWLOCK_TYPE_READER){
|
||||
lock->reader.next = node->next;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
else{
|
||||
node->next->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_lock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_lock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
|
||||
#endif
|
||||
|
||||
static inline unsigned int
|
||||
atomic_inc_ifnot0(ihk_atomic_t *v)
|
||||
{
|
||||
unsigned int *p = (unsigned int *)(&(v)->counter);
|
||||
unsigned int old;
|
||||
unsigned int new;
|
||||
unsigned int val;
|
||||
|
||||
do{
|
||||
if(!(old = *p))
|
||||
break;
|
||||
new = old + 1;
|
||||
val = atomic_cmpxchg4(p, old, new);
|
||||
}while(val != old);
|
||||
return old;
|
||||
}
|
||||
|
||||
static void
|
||||
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
struct mcs_rwlock_node *pred;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
node->type = MCS_RWLOCK_TYPE_READER;
|
||||
node->next = NULL;
|
||||
node->dmy1 = ihk_mc_get_processor_id();
|
||||
|
||||
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
|
||||
(unsigned long)node);
|
||||
|
||||
if (pred) {
|
||||
if(pred == &lock->reader){
|
||||
if(atomic_inc_ifnot0(&pred->count)){
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)node,
|
||||
(unsigned long)pred);
|
||||
|
||||
if (old == node) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (node->next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
lock->reader.next = node;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
ihk_atomic_dec(&pred->count);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
pred->next = node;
|
||||
while (node->locked != MCS_RWLOCK_UNLOCKED) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
else {
|
||||
lock->reader.next = node;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
if(ihk_atomic_dec_return(&lock->reader.count))
|
||||
goto out;
|
||||
|
||||
if (lock->reader.next == NULL) {
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
(unsigned long *)&lock->node,
|
||||
(unsigned long)&lock->reader,
|
||||
(unsigned long)0);
|
||||
|
||||
if (old == &lock->reader) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (lock->reader.next == NULL) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
}
|
||||
else{
|
||||
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
|
||||
}
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_lock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_lock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
node->irqsave = cpu_disable_interrupt_save();
|
||||
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_writer_unlock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_writer_unlock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
|
||||
cpu_restore_interrupt(node->irqsave);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_lock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_lock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
node->irqsave = cpu_disable_interrupt_save();
|
||||
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_MCS_RWLOCK
|
||||
#define mcs_rwlock_reader_unlock(l, n) { \
|
||||
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
|
||||
__mcs_rwlock_reader_unlock((l), (n)); \
|
||||
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
|
||||
}
|
||||
#else
|
||||
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
|
||||
#endif
|
||||
static void
|
||||
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
|
||||
{
|
||||
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
|
||||
cpu_restore_interrupt(node->irqsave);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -5,6 +5,8 @@
|
||||
* Define and declare memory management macros and functions
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@ -20,6 +22,7 @@
|
||||
#define USER_CS_ENTRY 6
|
||||
#define USER_DS_ENTRY 7
|
||||
#define GLOBAL_TSS_ENTRY 8
|
||||
#define GETCPU_ENTRY 15
|
||||
|
||||
#define KERNEL_CS (KERNEL_CS_ENTRY * 8)
|
||||
#define KERNEL_DS (KERNEL_DS_ENTRY * 8)
|
||||
@ -38,10 +41,12 @@
|
||||
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#define USER_END 0x0000800000000000UL
|
||||
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
|
||||
#define MAP_ST_START 0xffff800000000000UL
|
||||
#define MAP_VMAP_START 0xfffff00000000000UL
|
||||
#define MAP_FIXED_START 0xffffffff70000000UL
|
||||
#define MAP_KERNEL_START 0xffffffff80000000UL
|
||||
#define STACK_TOP(region) ((region)->user_end)
|
||||
|
||||
#define MAP_VMAP_SIZE 0x0000000100000000UL
|
||||
|
||||
@ -63,6 +68,8 @@
|
||||
|
||||
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
|
||||
#define PF_WRITABLE ((pte_t)0x02)
|
||||
#define PFLX_PWT ((pte_t)0x08)
|
||||
#define PFLX_PCD ((pte_t)0x10)
|
||||
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
|
||||
|
||||
#define PFL4_PRESENT ((pte_t)0x01)
|
||||
@ -72,8 +79,8 @@
|
||||
#define PFL3_PRESENT ((pte_t)0x01)
|
||||
#define PFL3_WRITABLE ((pte_t)0x02)
|
||||
#define PFL3_USER ((pte_t)0x04)
|
||||
#define PFL3_PWT ((pte_t)0x08)
|
||||
#define PFL3_PCD ((pte_t)0x10)
|
||||
#define PFL3_PWT PFLX_PWT
|
||||
#define PFL3_PCD PFLX_PCD
|
||||
#define PFL3_ACCESSED ((pte_t)0x20)
|
||||
#define PFL3_DIRTY ((pte_t)0x40)
|
||||
#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */
|
||||
@ -84,8 +91,8 @@
|
||||
#define PFL2_PRESENT ((pte_t)0x01)
|
||||
#define PFL2_WRITABLE ((pte_t)0x02)
|
||||
#define PFL2_USER ((pte_t)0x04)
|
||||
#define PFL2_PWT ((pte_t)0x08)
|
||||
#define PFL2_PCD ((pte_t)0x10)
|
||||
#define PFL2_PWT PFLX_PWT
|
||||
#define PFL2_PCD PFLX_PCD
|
||||
#define PFL2_ACCESSED ((pte_t)0x20)
|
||||
#define PFL2_DIRTY ((pte_t)0x40)
|
||||
#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */
|
||||
@ -96,8 +103,8 @@
|
||||
#define PFL1_PRESENT ((pte_t)0x01)
|
||||
#define PFL1_WRITABLE ((pte_t)0x02)
|
||||
#define PFL1_USER ((pte_t)0x04)
|
||||
#define PFL1_PWT ((pte_t)0x08)
|
||||
#define PFL1_PCD ((pte_t)0x10)
|
||||
#define PFL1_PWT PFLX_PWT
|
||||
#define PFL1_PCD PFLX_PCD
|
||||
#define PFL1_ACCESSED ((pte_t)0x20)
|
||||
#define PFL1_DIRTY ((pte_t)0x40)
|
||||
#define PFL1_IGNORED_11 ((pte_t)1 << 11)
|
||||
@ -117,6 +124,25 @@
|
||||
#define PTE_NULL ((pte_t)0)
|
||||
typedef unsigned long pte_t;
|
||||
|
||||
/*
|
||||
* pagemap kernel ABI bits
|
||||
*/
|
||||
#define PM_ENTRY_BYTES sizeof(uint64_t)
|
||||
#define PM_STATUS_BITS 3
|
||||
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
|
||||
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
|
||||
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
|
||||
#define PM_PSHIFT_BITS 6
|
||||
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
|
||||
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
|
||||
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
|
||||
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
|
||||
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
|
||||
|
||||
#define PM_PRESENT PM_STATUS(4LL)
|
||||
#define PM_SWAP PM_STATUS(2LL)
|
||||
|
||||
|
||||
/* For easy conversion, it is better to be the same as architecture's ones */
|
||||
enum ihk_mc_pt_attribute {
|
||||
PTATTR_ACTIVE = 0x01,
|
||||
@ -128,8 +154,11 @@ enum ihk_mc_pt_attribute {
|
||||
PTATTR_NO_EXECUTE = 0x8000000000000000,
|
||||
PTATTR_UNCACHABLE = 0x10000,
|
||||
PTATTR_FOR_USER = 0x20000,
|
||||
PTATTR_WRITE_COMBINED = 0x40000,
|
||||
};
|
||||
|
||||
enum ihk_mc_pt_attribute attr_mask;
|
||||
|
||||
static inline int pte_is_null(pte_t *ptep)
|
||||
{
|
||||
return (*ptep == PTE_NULL);
|
||||
@ -185,6 +214,33 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
|
||||
attr = *ptep & attr_mask;
|
||||
if (*ptep & PFLX_PWT) {
|
||||
if (*ptep & PFLX_PCD) {
|
||||
attr |= PTATTR_UNCACHABLE;
|
||||
}
|
||||
else {
|
||||
attr |= PTATTR_WRITE_COMBINED;
|
||||
}
|
||||
}
|
||||
if (((pgsize == PTL2_SIZE) && (*ptep & PFL2_SIZE))
|
||||
|| ((pgsize == PTL3_SIZE) && (*ptep & PFL3_SIZE))) {
|
||||
attr |= PTATTR_LARGEPAGE;
|
||||
}
|
||||
|
||||
return attr;
|
||||
} /* pte_get_attr() */
|
||||
|
||||
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
*ptep = PTE_NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_make_fileoff(off_t off,
|
||||
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
|
||||
{
|
||||
@ -216,20 +272,51 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
|
||||
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
|
||||
#endif
|
||||
|
||||
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
uint64_t mask;
|
||||
|
||||
switch (pgsize) {
|
||||
default: /* through */
|
||||
case PTL1_SIZE: mask = PFL1_DIRTY; break;
|
||||
case PTL2_SIZE: mask = PFL2_DIRTY; break;
|
||||
case PTL3_SIZE: mask = PFL3_DIRTY; break;
|
||||
}
|
||||
|
||||
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
|
||||
return;
|
||||
}
|
||||
|
||||
struct page_table;
|
||||
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
|
||||
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
|
||||
|
||||
void *early_alloc_page(void);
|
||||
void *early_alloc_pages(int nr_pages);
|
||||
void *get_last_early_heap(void);
|
||||
void flush_tlb(void);
|
||||
void flush_tlb_single(unsigned long addr);
|
||||
|
||||
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
|
||||
|
||||
#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x4000
|
||||
extern unsigned long ap_trampoline;
|
||||
//#define AP_TRAMPOLINE 0x10000
|
||||
#define AP_TRAMPOLINE_SIZE 0x2000
|
||||
|
||||
/* Local is cachable */
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
|
||||
#endif
|
||||
|
||||
23
arch/x86/kernel/include/arch-string.h
Normal file
23
arch/x86/kernel/include/arch-string.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef _ASM_X86_STRING_H
|
||||
#define _ASM_X86_STRING_H
|
||||
|
||||
#define ARCH_FAST_MEMCPY
|
||||
|
||||
static inline void *__inline_memcpy(void *to, const void *from, size_t n)
|
||||
{
|
||||
unsigned long d0, d1, d2;
|
||||
asm volatile("rep ; movsl\n\t"
|
||||
"testb $2,%b4\n\t"
|
||||
"je 1f\n\t"
|
||||
"movsw\n"
|
||||
"1:\ttestb $1,%b4\n\t"
|
||||
"je 2f\n\t"
|
||||
"movsb\n"
|
||||
"2:"
|
||||
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
||||
: "0" (n / 4), "q" (n), "1" ((long)to), "2" ((long)from)
|
||||
: "memory");
|
||||
return to;
|
||||
}
|
||||
|
||||
#endif
|
||||
18
arch/x86/kernel/include/arch/auxvec.h
Normal file
18
arch/x86/kernel/include/arch/auxvec.h
Normal file
@ -0,0 +1,18 @@
|
||||
/**
|
||||
* \file auxvec.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Declare architecture-dependent constants for auxiliary vector
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* Copyright (C) 2016 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef ARCH_AUXVEC_H
|
||||
#define ARCH_AUXVEC_H
|
||||
|
||||
#define AT_SYSINFO_EHDR 33
|
||||
|
||||
#endif
|
||||
37
arch/x86/kernel/include/arch/cpu.h
Normal file
37
arch/x86/kernel/include/arch/cpu.h
Normal file
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* \file cpu.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Declare architecture-dependent types and functions to control CPU.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef ARCH_CPU_H
|
||||
#define ARCH_CPU_H
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
|
||||
static inline void rmb(void)
|
||||
{
|
||||
barrier();
|
||||
}
|
||||
|
||||
static inline void wmb(void)
|
||||
{
|
||||
barrier();
|
||||
}
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
|
||||
return (low | ((unsigned long)high << 32));
|
||||
}
|
||||
|
||||
#endif /* ARCH_CPU_H */
|
||||
16
arch/x86/kernel/include/arch/mm.h
Normal file
16
arch/x86/kernel/include/arch/mm.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef __ARCH_MM_H
|
||||
#define __ARCH_MM_H
|
||||
|
||||
struct process_vm;
|
||||
|
||||
static inline void
|
||||
flush_nfo_tlb()
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
flush_nfo_tlb_mm(struct process_vm *vm)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
40
arch/x86/kernel/include/arch/mman.h
Normal file
40
arch/x86/kernel/include/arch/mman.h
Normal file
@ -0,0 +1,40 @@
|
||||
/**
|
||||
* \file mman.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory management declarations
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_MMAN_H
|
||||
#define HEADER_ARCH_MMAN_H
|
||||
|
||||
/*
|
||||
* mapping flags
|
||||
*/
|
||||
#define MAP_32BIT 0x40
|
||||
#define MAP_GROWSDOWN 0x0100
|
||||
#define MAP_DENYWRITE 0x0800
|
||||
#define MAP_EXECUTABLE 0x1000
|
||||
#define MAP_LOCKED 0x2000
|
||||
#define MAP_NORESERVE 0x4000
|
||||
#define MAP_POPULATE 0x8000
|
||||
#define MAP_NONBLOCK 0x00010000
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
#define MAP_HUGE_SHIFT 26
|
||||
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
|
||||
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
|
||||
|
||||
/*
|
||||
* for mlockall()
|
||||
*/
|
||||
#define MCL_CURRENT 0x01
|
||||
#define MCL_FUTURE 0x02
|
||||
|
||||
#endif /* HEADER_ARCH_MMAN_H */
|
||||
46
arch/x86/kernel/include/arch/shm.h
Normal file
46
arch/x86/kernel/include/arch/shm.h
Normal file
@ -0,0 +1,46 @@
|
||||
/**
|
||||
* \file shm.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* header file for System V shared memory
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef HEADER_ARCH_SHM_H
|
||||
#define HEADER_ARCH_SHM_H
|
||||
|
||||
/* shmflg */
|
||||
#define SHM_HUGE_SHIFT 26
|
||||
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
|
||||
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
uid_t cuid;
|
||||
gid_t cgid;
|
||||
uint16_t mode;
|
||||
uint8_t padding[2];
|
||||
uint16_t seq;
|
||||
uint8_t padding2[22];
|
||||
};
|
||||
|
||||
struct shmid_ds {
|
||||
struct ipc_perm shm_perm;
|
||||
size_t shm_segsz;
|
||||
time_t shm_atime;
|
||||
time_t shm_dtime;
|
||||
time_t shm_ctime;
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
uint64_t shm_nattch;
|
||||
uint8_t padding[12];
|
||||
int init_pgshift;
|
||||
};
|
||||
|
||||
#endif /* HEADER_ARCH_SHM_H */
|
||||
@ -22,7 +22,7 @@
|
||||
* - 4096 : kernel stack
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_CPU_LOCAL_OFFSET_KSTACK 16
|
||||
#define X86_CPU_LOCAL_OFFSET_USTACK 24
|
||||
|
||||
@ -39,10 +39,13 @@ struct x86_cpu_local_variables {
|
||||
struct x86_desc_ptr gdt_ptr;
|
||||
unsigned short pad[3];
|
||||
/* 48 */
|
||||
uint64_t gdt[10];
|
||||
/* 128 */
|
||||
uint64_t gdt[16];
|
||||
/* 176 */
|
||||
struct tss64 tss;
|
||||
|
||||
/* 280 */
|
||||
unsigned long paniced;
|
||||
uint64_t panic_regs[21];
|
||||
/* 456 */
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
|
||||
|
||||
@ -1,40 +1,7 @@
|
||||
#ifndef _ASM_GENERIC_ERRNO_BASE_H
|
||||
#define _ASM_GENERIC_ERRNO_BASE_H
|
||||
#ifndef _ERRNO_BASE_H
|
||||
#define _ERRNO_BASE_H
|
||||
|
||||
#define EPERM 1 /* Operation not permitted */
|
||||
#define ENOENT 2 /* No such file or directory */
|
||||
#define ESRCH 3 /* No such process */
|
||||
#define EINTR 4 /* Interrupted system call */
|
||||
#define EIO 5 /* I/O error */
|
||||
#define ENXIO 6 /* No such device or address */
|
||||
#define E2BIG 7 /* Argument list too long */
|
||||
#define ENOEXEC 8 /* Exec format error */
|
||||
#define EBADF 9 /* Bad file number */
|
||||
#define ECHILD 10 /* No child processes */
|
||||
#define EAGAIN 11 /* Try again */
|
||||
#define ENOMEM 12 /* Out of memory */
|
||||
#define EACCES 13 /* Permission denied */
|
||||
#define EFAULT 14 /* Bad address */
|
||||
#define ENOTBLK 15 /* Block device required */
|
||||
#define EBUSY 16 /* Device or resource busy */
|
||||
#define EEXIST 17 /* File exists */
|
||||
#define EXDEV 18 /* Cross-device link */
|
||||
#define ENODEV 19 /* No such device */
|
||||
#define ENOTDIR 20 /* Not a directory */
|
||||
#define EISDIR 21 /* Is a directory */
|
||||
#define EINVAL 22 /* Invalid argument */
|
||||
#define ENFILE 23 /* File table overflow */
|
||||
#define EMFILE 24 /* Too many open files */
|
||||
#define ENOTTY 25 /* Not a typewriter */
|
||||
#define ETXTBSY 26 /* Text file busy */
|
||||
#define EFBIG 27 /* File too large */
|
||||
#define ENOSPC 28 /* No space left on device */
|
||||
#define ESPIPE 29 /* Illegal seek */
|
||||
#define EROFS 30 /* Read-only file system */
|
||||
#define EMLINK 31 /* Too many links */
|
||||
#define EPIPE 32 /* Broken pipe */
|
||||
#define EDOM 33 /* Math argument out of domain of func */
|
||||
#define ERANGE 34 /* Math result not representable */
|
||||
#include <generic-errno.h>
|
||||
|
||||
#define EDEADLK 35 /* Resource deadlock would occur */
|
||||
#define ENAMETOOLONG 36 /* File name too long */
|
||||
@ -141,29 +108,4 @@
|
||||
|
||||
#define ERFKILL 132 /* Operation not possible due to RF-kill */
|
||||
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/* Should never be seen by user programs */
|
||||
#define ERESTARTSYS 512
|
||||
#define ERESTARTNOINTR 513
|
||||
#define ERESTARTNOHAND 514 /* restart if no handler.. */
|
||||
#define ENOIOCTLCMD 515 /* No ioctl command */
|
||||
#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
|
||||
|
||||
/* Defined for the NFSv3 protocol */
|
||||
#define EBADHANDLE 521 /* Illegal NFS file handle */
|
||||
#define ENOTSYNC 522 /* Update synchronization mismatch */
|
||||
#define EBADCOOKIE 523 /* Cookie is stale */
|
||||
#define ENOTSUPP 524 /* Operation is not supported */
|
||||
#define ETOOSMALL 525 /* Buffer or request is too small */
|
||||
#define ESERVERFAULT 526 /* An untranslatable error occurred */
|
||||
#define EBADTYPE 527 /* Type not supported by server */
|
||||
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
|
||||
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
|
||||
#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
@ -13,6 +13,10 @@
|
||||
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
|
||||
#define HEADER_X86_COMMON_IHK_ATOMIC_H
|
||||
|
||||
/***********************************************************************
|
||||
* ihk_atomic_t
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int counter;
|
||||
} ihk_atomic_t;
|
||||
@ -95,6 +99,30 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
|
||||
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
|
||||
|
||||
/***********************************************************************
|
||||
* ihk_atomic64_t
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
long counter64;
|
||||
} ihk_atomic64_t;
|
||||
|
||||
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
|
||||
|
||||
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
|
||||
{
|
||||
return *(volatile long *)&(v)->counter64;
|
||||
}
|
||||
|
||||
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
|
||||
{
|
||||
asm volatile ("lock incq %0" : "+m"(v->counter64));
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* others
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
|
||||
* Note 2: xchg has side effect, so that attribute volatile is necessary,
|
||||
@ -112,6 +140,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
__x; \
|
||||
})
|
||||
|
||||
static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
|
||||
{
|
||||
unsigned long __x = (x);
|
||||
asm volatile("xchgq %0,%1"
|
||||
: "=r" (__x)
|
||||
: "m" (*(volatile unsigned long*)(ptr)), "0" (__x)
|
||||
: "memory");
|
||||
|
||||
return __x;
|
||||
}
|
||||
|
||||
#define __xchg(x, ptr, size) \
|
||||
({ \
|
||||
__typeof(*(ptr)) __x = (x); \
|
||||
@ -150,5 +189,30 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
|
||||
#define xchg(ptr, v) \
|
||||
__xchg((v), (ptr), sizeof(*ptr))
|
||||
|
||||
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
|
||||
unsigned long oldval,
|
||||
unsigned long newval)
|
||||
{
|
||||
asm volatile("lock; cmpxchgq %2, %1\n"
|
||||
: "=a" (oldval), "+m" (*addr)
|
||||
: "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
static inline unsigned long atomic_cmpxchg4(unsigned int *addr,
|
||||
unsigned int oldval,
|
||||
unsigned int newval)
|
||||
{
|
||||
asm volatile("lock; cmpxchgl %2, %1\n"
|
||||
: "=a" (oldval), "+m" (*addr)
|
||||
: "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -22,19 +22,35 @@ struct x86_kregs {
|
||||
};
|
||||
|
||||
typedef struct x86_kregs ihk_mc_kernel_context_t;
|
||||
|
||||
/* XXX: User context should contain floating point registers */
|
||||
typedef struct x86_regs ihk_mc_user_context_t;
|
||||
struct x86_user_context {
|
||||
struct x86_sregs sr;
|
||||
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->r9
|
||||
/* 16-byte boundary here */
|
||||
uint8_t is_gpr_valid;
|
||||
uint8_t is_sr_valid;
|
||||
uint8_t spare_flags6;
|
||||
uint8_t spare_flags5;
|
||||
uint8_t spare_flags4;
|
||||
uint8_t spare_flags3;
|
||||
uint8_t spare_flags2;
|
||||
uint8_t spare_flags1;
|
||||
struct x86_basic_regs gpr; /* must be last */
|
||||
/* 16-byte boundary here */
|
||||
};
|
||||
typedef struct x86_user_context ihk_mc_user_context_t;
|
||||
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->rax
|
||||
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
|
||||
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
|
||||
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
|
||||
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
|
||||
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
|
||||
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->rsp
|
||||
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
|
||||
|
||||
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
|
||||
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
|
||||
|
||||
#endif
|
||||
|
||||
@ -31,9 +31,5 @@ typedef int64_t off_t;
|
||||
|
||||
#define NULL ((void *)0)
|
||||
|
||||
#define BITS_PER_LONG_SHIFT 6
|
||||
#define BITS_PER_LONG (1 << BITS_PER_LONG_SHIFT)
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
17
arch/x86/kernel/include/prctl.h
Normal file
17
arch/x86/kernel/include/prctl.h
Normal file
@ -0,0 +1,17 @@
|
||||
/**
|
||||
* \file prctl.h
|
||||
* License details are found in the file LICENSE.
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_PRCTL_H
|
||||
#define __ARCH_PRCTL_H
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
#define ARCH_GET_GS 0x1004
|
||||
|
||||
#endif
|
||||
@ -6,6 +6,8 @@
|
||||
* Machine Specific Registers (MSR)
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@ -16,7 +18,31 @@
|
||||
|
||||
#include <types.h>
|
||||
|
||||
#define RFLAGS_CF (1 << 0)
|
||||
#define RFLAGS_PF (1 << 2)
|
||||
#define RFLAGS_AF (1 << 4)
|
||||
#define RFLAGS_ZF (1 << 6)
|
||||
#define RFLAGS_SF (1 << 7)
|
||||
#define RFLAGS_TF (1 << 8)
|
||||
#define RFLAGS_IF (1 << 9)
|
||||
#define RFLAGS_DF (1 << 10)
|
||||
#define RFLAGS_OF (1 << 11)
|
||||
#define RFLAGS_IOPL (3 << 12)
|
||||
#define RFLAGS_NT (1 << 14)
|
||||
#define RFLAGS_RF (1 << 16)
|
||||
#define RFLAGS_VM (1 << 17)
|
||||
#define RFLAGS_AC (1 << 18)
|
||||
#define RFLAGS_VIF (1 << 19)
|
||||
#define RFLAGS_VIP (1 << 20)
|
||||
#define RFLAGS_ID (1 << 21)
|
||||
|
||||
#define DB6_B0 (1 << 0)
|
||||
#define DB6_B1 (1 << 1)
|
||||
#define DB6_B2 (1 << 2)
|
||||
#define DB6_B3 (1 << 3)
|
||||
#define DB6_BD (1 << 13)
|
||||
#define DB6_BS (1 << 14)
|
||||
#define DB6_BT (1 << 15)
|
||||
|
||||
#define MSR_EFER 0xc0000080
|
||||
#define MSR_STAR 0xc0000081
|
||||
@ -26,6 +52,14 @@
|
||||
#define MSR_GS_BASE 0xc0000101
|
||||
|
||||
#define MSR_IA32_APIC_BASE 0x000000001b
|
||||
#define MSR_PLATFORM_INFO 0x000000ce
|
||||
#define MSR_IA32_PERF_CTL 0x00000199
|
||||
#define MSR_IA32_MISC_ENABLE 0x000001a0
|
||||
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
|
||||
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
|
||||
#define MSR_IA32_CR_PAT 0x00000277
|
||||
#define MSR_IA32_XSS 0xda0
|
||||
|
||||
|
||||
#define CVAL(event, mask) \
|
||||
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
|
||||
@ -37,6 +71,25 @@
|
||||
#define MSR_PERF_CTL_0 0xc0010000
|
||||
#define MSR_PERF_CTR_0 0xc0010004
|
||||
|
||||
static unsigned long xgetbv(unsigned int index)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
|
||||
|
||||
return low | ((unsigned long)high << 32);
|
||||
}
|
||||
|
||||
static void xsetbv(unsigned int index, unsigned long val)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
low = val;
|
||||
high = val >> 32;
|
||||
|
||||
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
|
||||
}
|
||||
|
||||
static void wrmsr(unsigned int idx, unsigned long value){
|
||||
unsigned int high, low;
|
||||
|
||||
@ -135,10 +188,19 @@ struct tss64 {
|
||||
unsigned short iomap_address;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_regs {
|
||||
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
|
||||
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
|
||||
unsigned long error, rip, cs, rflags, rsp, ss;
|
||||
struct x86_basic_regs {
|
||||
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
|
||||
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
|
||||
unsigned long rip, cs, rflags, rsp, ss;
|
||||
};
|
||||
|
||||
struct x86_sregs {
|
||||
unsigned long fs_base;
|
||||
unsigned long gs_base;
|
||||
unsigned long ds;
|
||||
unsigned long es;
|
||||
unsigned long fs;
|
||||
unsigned long gs;
|
||||
};
|
||||
|
||||
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
|
||||
@ -162,7 +224,72 @@ enum x86_pf_error_code {
|
||||
PF_RSVD = 1 << 3,
|
||||
PF_INSTR = 1 << 4,
|
||||
|
||||
PF_PATCH = 1 << 29,
|
||||
PF_POPULATE = 1 << 30,
|
||||
};
|
||||
|
||||
struct i387_fxsave_struct {
|
||||
unsigned short cwd;
|
||||
unsigned short swd;
|
||||
unsigned short twd;
|
||||
unsigned short fop;
|
||||
union {
|
||||
struct {
|
||||
unsigned long rip;
|
||||
unsigned long rdp;
|
||||
};
|
||||
struct {
|
||||
unsigned int fip;
|
||||
unsigned int fcs;
|
||||
unsigned int foo;
|
||||
unsigned int fos;
|
||||
};
|
||||
};
|
||||
unsigned int mxcsr;
|
||||
unsigned int mxcsr_mask;
|
||||
unsigned int st_space[32];
|
||||
unsigned int xmm_space[64];
|
||||
unsigned int padding[12];
|
||||
union {
|
||||
unsigned int padding1[12];
|
||||
unsigned int sw_reserved[12];
|
||||
};
|
||||
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
struct ymmh_struct {
|
||||
unsigned int ymmh_space[64];
|
||||
};
|
||||
|
||||
struct lwp_struct {
|
||||
unsigned char reserved[128];
|
||||
};
|
||||
|
||||
struct bndreg {
|
||||
unsigned long lower_bound;
|
||||
unsigned long upper_bound;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bndcsr {
|
||||
unsigned long bndcfgu;
|
||||
unsigned long bndstatus;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_hdr_struct {
|
||||
unsigned long xstate_bv;
|
||||
unsigned long xcomp_bv;
|
||||
unsigned long reserved[6];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct xsave_struct {
|
||||
struct i387_fxsave_struct i387;
|
||||
struct xsave_hdr_struct xsave_hdr;
|
||||
struct ymmh_struct ymmh;
|
||||
struct lwp_struct lwp;
|
||||
struct bndreg bndreg[4];
|
||||
struct bndcsr bndcsr;
|
||||
} __attribute__ ((packed, aligned (64)));
|
||||
|
||||
typedef struct xsave_struct fp_regs_struct;
|
||||
|
||||
#endif
|
||||
|
||||
@ -90,10 +90,6 @@ enum __rlimit_resource
|
||||
#define RLIM_NLIMITS __RLIM_NLIMITS
|
||||
};
|
||||
|
||||
|
||||
struct rlimit {
|
||||
uint64_t rlim_cur; /* Soft limit */
|
||||
uint64_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
|
||||
};
|
||||
#include <generic-rlimit.h>
|
||||
|
||||
#endif
|
||||
@ -20,12 +20,13 @@
|
||||
* syscall_name[] only, no handler exists.
|
||||
*/
|
||||
|
||||
SYSCALL_DELEGATED(0, read)
|
||||
SYSCALL_HANDLED(0, read)
|
||||
SYSCALL_DELEGATED(1, write)
|
||||
SYSCALL_DELEGATED(2, open)
|
||||
SYSCALL_DELEGATED(3, close)
|
||||
SYSCALL_HANDLED(3, close)
|
||||
SYSCALL_DELEGATED(4, stat)
|
||||
SYSCALL_DELEGATED(5, fstat)
|
||||
SYSCALL_DELEGATED(7, poll)
|
||||
SYSCALL_DELEGATED(8, lseek)
|
||||
SYSCALL_HANDLED(9, mmap)
|
||||
SYSCALL_HANDLED(10, mprotect)
|
||||
@ -34,15 +35,24 @@ SYSCALL_HANDLED(12, brk)
|
||||
SYSCALL_HANDLED(13, rt_sigaction)
|
||||
SYSCALL_HANDLED(14, rt_sigprocmask)
|
||||
SYSCALL_HANDLED(15, rt_sigreturn)
|
||||
SYSCALL_DELEGATED(16, ioctl)
|
||||
SYSCALL_HANDLED(16, ioctl)
|
||||
SYSCALL_DELEGATED(17, pread64)
|
||||
SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_DELEGATED(23, select)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
SYSCALL_HANDLED(25, mremap)
|
||||
SYSCALL_HANDLED(26, msync)
|
||||
SYSCALL_HANDLED(27, mincore)
|
||||
SYSCALL_HANDLED(28, madvise)
|
||||
SYSCALL_HANDLED(29, shmget)
|
||||
SYSCALL_HANDLED(30, shmat)
|
||||
SYSCALL_HANDLED(31, shmctl)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(35, nanosleep)
|
||||
SYSCALL_HANDLED(36, getitimer)
|
||||
SYSCALL_HANDLED(38, setitimer)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
SYSCALL_HANDLED(56, clone)
|
||||
SYSCALL_DELEGATED(57, fork)
|
||||
@ -52,47 +62,96 @@ SYSCALL_HANDLED(60, exit)
|
||||
SYSCALL_HANDLED(61, wait4)
|
||||
SYSCALL_HANDLED(62, kill)
|
||||
SYSCALL_DELEGATED(63, uname)
|
||||
SYSCALL_DELEGATED(72, fcntl)
|
||||
SYSCALL_DELEGATED(65, semop)
|
||||
SYSCALL_HANDLED(67, shmdt)
|
||||
SYSCALL_DELEGATED(69, msgsnd)
|
||||
SYSCALL_DELEGATED(70, msgrcv)
|
||||
SYSCALL_HANDLED(72, fcntl)
|
||||
SYSCALL_DELEGATED(79, getcwd)
|
||||
SYSCALL_DELEGATED(89, readlink)
|
||||
SYSCALL_DELEGATED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(97, getrlimit)
|
||||
SYSCALL_HANDLED(98, getrusage)
|
||||
SYSCALL_HANDLED(100, times)
|
||||
SYSCALL_HANDLED(101, ptrace)
|
||||
SYSCALL_DELEGATED(102, getuid)
|
||||
SYSCALL_DELEGATED(104, getgid)
|
||||
SYSCALL_DELEGATED(107, geteuid)
|
||||
SYSCALL_DELEGATED(108, getegid)
|
||||
SYSCALL_HANDLED(102, getuid)
|
||||
SYSCALL_HANDLED(104, getgid)
|
||||
SYSCALL_HANDLED(105, setuid)
|
||||
SYSCALL_HANDLED(106, setgid)
|
||||
SYSCALL_HANDLED(107, geteuid)
|
||||
SYSCALL_HANDLED(108, getegid)
|
||||
SYSCALL_HANDLED(109, setpgid)
|
||||
SYSCALL_DELEGATED(110, getppid)
|
||||
SYSCALL_HANDLED(110, getppid)
|
||||
SYSCALL_DELEGATED(111, getpgrp)
|
||||
SYSCALL_HANDLED(113, setreuid)
|
||||
SYSCALL_HANDLED(114, setregid)
|
||||
SYSCALL_HANDLED(117, setresuid)
|
||||
SYSCALL_HANDLED(118, getresuid)
|
||||
SYSCALL_HANDLED(119, setresgid)
|
||||
SYSCALL_HANDLED(120, getresgid)
|
||||
SYSCALL_HANDLED(122, setfsuid)
|
||||
SYSCALL_HANDLED(123, setfsgid)
|
||||
SYSCALL_HANDLED(127, rt_sigpending)
|
||||
SYSCALL_HANDLED(128, rt_sigtimedwait)
|
||||
SYSCALL_HANDLED(129, rt_sigqueueinfo)
|
||||
SYSCALL_HANDLED(130, rt_sigsuspend)
|
||||
SYSCALL_HANDLED(131, sigaltstack)
|
||||
SYSCALL_HANDLED(142, sched_setparam)
|
||||
SYSCALL_HANDLED(143, sched_getparam)
|
||||
SYSCALL_HANDLED(144, sched_setscheduler)
|
||||
SYSCALL_HANDLED(145, sched_getscheduler)
|
||||
SYSCALL_HANDLED(146, sched_get_priority_max)
|
||||
SYSCALL_HANDLED(147, sched_get_priority_min)
|
||||
SYSCALL_HANDLED(148, sched_rr_get_interval)
|
||||
SYSCALL_HANDLED(149, mlock)
|
||||
SYSCALL_HANDLED(150, munlock)
|
||||
SYSCALL_HANDLED(151, mlockall)
|
||||
SYSCALL_HANDLED(152, munlockall)
|
||||
SYSCALL_HANDLED(158, arch_prctl)
|
||||
SYSCALL_HANDLED(160, setrlimit)
|
||||
SYSCALL_HANDLED(164, settimeofday)
|
||||
SYSCALL_HANDLED(186, gettid)
|
||||
SYSCALL_HANDLED(200, tkill)
|
||||
SYSCALL_DELEGATED(201, time)
|
||||
SYSCALL_HANDLED(202, futex)
|
||||
SYSCALL_HANDLED(203, sched_setaffinity)
|
||||
SYSCALL_HANDLED(204, sched_getaffinity)
|
||||
SYSCALL_DELEGATED(208, io_getevents)
|
||||
SYSCALL_HANDLED(216, remap_file_pages)
|
||||
SYSCALL_DELEGATED(217, getdents64)
|
||||
SYSCALL_HANDLED(218, set_tid_address)
|
||||
SYSCALL_DELEGATED(220, semtimedop)
|
||||
SYSCALL_HANDLED(228, clock_gettime)
|
||||
SYSCALL_DELEGATED(230, clock_nanosleep)
|
||||
SYSCALL_HANDLED(231, exit_group)
|
||||
SYSCALL_DELEGATED(232, epoll_wait)
|
||||
SYSCALL_HANDLED(234, tgkill)
|
||||
SYSCALL_HANDLED(237, mbind)
|
||||
SYSCALL_HANDLED(238, set_mempolicy)
|
||||
SYSCALL_HANDLED(239, get_mempolicy)
|
||||
SYSCALL_HANDLED(247, waitid)
|
||||
SYSCALL_HANDLED(256, migrate_pages)
|
||||
SYSCALL_DELEGATED(270, pselect6)
|
||||
SYSCALL_DELEGATED(271, ppoll)
|
||||
SYSCALL_HANDLED(273, set_robust_list)
|
||||
SYSCALL_HANDLED(279, move_pages)
|
||||
SYSCALL_DELEGATED(281, epoll_pwait)
|
||||
SYSCALL_HANDLED(282, signalfd)
|
||||
SYSCALL_HANDLED(289, signalfd4)
|
||||
SYSCALL_HANDLED(298, perf_event_open)
|
||||
#ifdef DCFA_KMOD
|
||||
SYSCALL_HANDLED(303, mod_call)
|
||||
#endif
|
||||
SYSCALL_HANDLED(309, getcpu)
|
||||
SYSCALL_HANDLED(310, process_vm_readv)
|
||||
SYSCALL_HANDLED(311, process_vm_writev)
|
||||
SYSCALL_HANDLED(601, pmc_init)
|
||||
SYSCALL_HANDLED(602, pmc_start)
|
||||
SYSCALL_HANDLED(603, pmc_stop)
|
||||
SYSCALL_HANDLED(604, pmc_reset)
|
||||
SYSCALL_HANDLED(700, get_cpu_id)
|
||||
#ifdef TRACK_SYSCALLS
|
||||
SYSCALL_HANDLED(701, syscall_offload_clr_cntrs)
|
||||
#endif // TRACK_SYSCALLS
|
||||
|
||||
/**** End of File ****/
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
* 2013/?? - bgerofi + shimosawa: handle rsp correctly for nested interrupts
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_TSS_OFFSET_SP0 4
|
||||
#define X86_CPU_LOCAL_OFFSET_SP0 \
|
||||
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
|
||||
@ -24,39 +24,56 @@
|
||||
#define USER_CS (48 + 3)
|
||||
#define USER_DS (56 + 3)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rbp; \
|
||||
pushq %rax; \
|
||||
pushq %rbx; \
|
||||
pushq %rcx; \
|
||||
pushq %rdx; \
|
||||
pushq %rsi; \
|
||||
pushq %rdi; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15;
|
||||
#define POP_ALL_REGS \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rdi; \
|
||||
popq %rsi; \
|
||||
popq %rdx; \
|
||||
popq %rcx; \
|
||||
popq %rbx; \
|
||||
popq %rax; \
|
||||
popq %rbp
|
||||
|
||||
/* struct x86_user_context */
|
||||
#define X86_SREGS_BASE (0)
|
||||
#define X86_SREGS_SIZE 48
|
||||
|
||||
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
|
||||
#define X86_FLAGS_SIZE 8
|
||||
|
||||
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
|
||||
#define RAX_OFFSET (X86_REGS_BASE + 80)
|
||||
#define ERROR_OFFSET (X86_REGS_BASE + 120)
|
||||
#define RSP_OFFSET (X86_REGS_BASE + 152)
|
||||
|
||||
#define PUSH_ALL_REGS \
|
||||
pushq %rdi; \
|
||||
pushq %rsi; \
|
||||
pushq %rdx; \
|
||||
pushq %rcx; \
|
||||
pushq %rax; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11; \
|
||||
pushq %rbx; \
|
||||
pushq %rbp; \
|
||||
pushq %r12; \
|
||||
pushq %r13; \
|
||||
pushq %r14; \
|
||||
pushq %r15; \
|
||||
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
|
||||
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
|
||||
|
||||
#define POP_ALL_REGS \
|
||||
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
|
||||
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
|
||||
popq %r15; \
|
||||
popq %r14; \
|
||||
popq %r13; \
|
||||
popq %r12; \
|
||||
popq %rbp; \
|
||||
popq %rbx; \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rax; \
|
||||
popq %rcx; \
|
||||
popq %rdx; \
|
||||
popq %rsi; \
|
||||
popq %rdi
|
||||
|
||||
.data
|
||||
.globl generic_common_handlers
|
||||
generic_common_handlers:
|
||||
@ -75,7 +92,7 @@ vector=vector+1
|
||||
|
||||
common_interrupt:
|
||||
PUSH_ALL_REGS
|
||||
movq 120(%rsp), %rdi
|
||||
movq ERROR_OFFSET(%rsp), %rdi
|
||||
movq %rsp, %rsi
|
||||
call handle_interrupt /* Enter C code */
|
||||
POP_ALL_REGS
|
||||
@ -91,7 +108,7 @@ page_fault:
|
||||
cld
|
||||
PUSH_ALL_REGS
|
||||
movq %cr2, %rdi
|
||||
movq 120(%rsp),%rsi
|
||||
movq ERROR_OFFSET(%rsp),%rsi
|
||||
movq %rsp,%rdx
|
||||
movq __page_fault_handler_address(%rip), %rax
|
||||
andq %rax, %rax
|
||||
@ -113,10 +130,53 @@ general_protection_exception:
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl nmi
|
||||
nmi:
|
||||
#define PANICED 232
|
||||
#define PANIC_REGS 240
|
||||
movq %rax,%gs:PANIC_REGS+0x00
|
||||
movq %rbx,%gs:PANIC_REGS+0x08
|
||||
movq %rcx,%gs:PANIC_REGS+0x10
|
||||
movq %rdx,%gs:PANIC_REGS+0x18
|
||||
movq %rsi,%gs:PANIC_REGS+0x20
|
||||
movq %rdi,%gs:PANIC_REGS+0x28
|
||||
movq %rbp,%gs:PANIC_REGS+0x30
|
||||
movq 0x18(%rsp),%rax /* rsp */
|
||||
movq %rax,%gs:PANIC_REGS+0x38
|
||||
movq %r8, %gs:PANIC_REGS+0x40
|
||||
movq %r9, %gs:PANIC_REGS+0x48
|
||||
movq %r10,%gs:PANIC_REGS+0x50
|
||||
movq %r11,%gs:PANIC_REGS+0x58
|
||||
movq %r12,%gs:PANIC_REGS+0x60
|
||||
movq %r13,%gs:PANIC_REGS+0x68
|
||||
movq %r14,%gs:PANIC_REGS+0x70
|
||||
movq %r15,%gs:PANIC_REGS+0x78
|
||||
movq 0x00(%rsp),%rax /* rip */
|
||||
movq %rax,%gs:PANIC_REGS+0x80
|
||||
movq 0x10(%rsp),%rax /* rflags */
|
||||
movl %eax,%gs:PANIC_REGS+0x88
|
||||
movq 0x08(%rsp),%rax /* cs */
|
||||
movl %eax,%gs:PANIC_REGS+0x8C
|
||||
movq 0x20(%rsp),%rax /* ss */
|
||||
movl %eax,%gs:PANIC_REGS+0x90
|
||||
xorq %rax,%rax
|
||||
movw %ds,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x94
|
||||
movw %es,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x98
|
||||
movw %fs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0x9C
|
||||
movw %gs,%ax
|
||||
movl %eax,%gs:PANIC_REGS+0xA0
|
||||
movq $1,%gs:PANICED
|
||||
1:
|
||||
hlt
|
||||
jmp 1b
|
||||
|
||||
.globl x86_syscall
|
||||
x86_syscall:
|
||||
cld
|
||||
movq %rsp, %gs:24
|
||||
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
|
||||
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
|
||||
|
||||
pushq $(USER_DS)
|
||||
@ -124,21 +184,19 @@ x86_syscall:
|
||||
pushq %r11
|
||||
pushq $(USER_CS)
|
||||
pushq %rcx
|
||||
pushq $0
|
||||
movq %gs:24, %rcx
|
||||
movq %rcx, 32(%rsp)
|
||||
pushq %rax /* error code (= system call number) */
|
||||
PUSH_ALL_REGS
|
||||
movq 104(%rsp), %rdi
|
||||
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
|
||||
movq %rcx, RSP_OFFSET(%rsp)
|
||||
movq RAX_OFFSET(%rsp), %rdi
|
||||
movw %ss, %ax
|
||||
movw %ax, %ds
|
||||
movq %rsp, %rsi
|
||||
callq *__x86_syscall_handler(%rip)
|
||||
1:
|
||||
movq %rax, 104(%rsp)
|
||||
movq %rax, RAX_OFFSET(%rsp)
|
||||
POP_ALL_REGS
|
||||
#ifdef USE_SYSRET
|
||||
movq 8(%rsp), %rcx
|
||||
movq 24(%rsp), %r11
|
||||
movq 32(%rsp), %rsp
|
||||
sysretq
|
||||
#else
|
||||
@ -147,7 +205,35 @@ x86_syscall:
|
||||
#endif
|
||||
|
||||
.globl enter_user_mode
|
||||
enter_user_mode:
|
||||
enter_user_mode:
|
||||
callq release_runq_lock
|
||||
movq $0, %rdi
|
||||
movq %rsp, %rsi
|
||||
call check_signal
|
||||
movq $0, %rdi
|
||||
call set_cputime
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl debug_exception
|
||||
debug_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call debug_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
.globl int3_exception
|
||||
int3_exception:
|
||||
cld
|
||||
pushq $0 /* error */
|
||||
PUSH_ALL_REGS
|
||||
movq %rsp, %rdi
|
||||
call int3_handler
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
@ -6,6 +6,8 @@
|
||||
* resides in memory.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
@ -19,26 +21,37 @@
|
||||
#include <registers.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LOCALS_SPAN (4 * PAGE_SIZE)
|
||||
|
||||
struct x86_cpu_local_variables *locals;
|
||||
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
|
||||
|
||||
void init_processors_local(int max_id)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
size = LOCALS_SPAN * max_id;
|
||||
/* Is contiguous allocating adequate?? */
|
||||
locals = ihk_mc_alloc_pages(max_id, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, PAGE_SIZE * max_id);
|
||||
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
|
||||
memset(locals, 0, size);
|
||||
|
||||
kprintf("locals = %p\n", locals);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(id);
|
||||
@ ensures \result == locals + (LOCALS_SPAN * id);
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
|
||||
{
|
||||
return (struct x86_cpu_local_variables *)
|
||||
((char *)locals + (id << PAGE_SHIFT));
|
||||
((char *)locals + (LOCALS_SPAN * id));
|
||||
}
|
||||
|
||||
static void *get_x86_cpu_local_kstack(int id)
|
||||
{
|
||||
return ((char *)locals + ((id + 1) << PAGE_SHIFT));
|
||||
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
|
||||
}
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
|
||||
@ -80,7 +93,20 @@ void assign_processor_id(void)
|
||||
v->processor_id = id;
|
||||
}
|
||||
|
||||
void init_boot_processor_local(void)
|
||||
{
|
||||
static struct x86_cpu_local_variables avar;
|
||||
|
||||
memset(&avar, -1, sizeof(avar));
|
||||
set_gs_base(&avar);
|
||||
return;
|
||||
}
|
||||
|
||||
/** IHK **/
|
||||
/*@
|
||||
@ ensures \result == %gs;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
int id;
|
||||
@ -90,6 +116,10 @@ int ihk_mc_get_processor_id(void)
|
||||
return id;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ ensures \result == (locals + (LOCALS_SPAN * %gs))->apic_id;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
int ihk_mc_get_hardware_processor_id(void)
|
||||
{
|
||||
struct x86_cpu_local_variables *v = get_x86_this_cpu_local();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,7 @@
|
||||
#include <memory.h>
|
||||
#include <string.h>
|
||||
|
||||
extern int num_processors;
|
||||
extern void arch_set_mikc_queue(void *r, void *w);
|
||||
ihk_ikc_ph_t arch_master_channel_packet_handler;
|
||||
|
||||
@ -23,22 +24,28 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
|
||||
ihk_ikc_ph_t packet_handler)
|
||||
{
|
||||
struct ihk_ikc_queue_head *rq, *wq;
|
||||
size_t mikc_queue_pages;
|
||||
|
||||
ihk_ikc_system_init(NULL);
|
||||
|
||||
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
|
||||
|
||||
/* Place both sides in this side */
|
||||
rq = arch_alloc_page(IHK_MC_AP_CRITICAL);
|
||||
wq = arch_alloc_page(IHK_MC_AP_CRITICAL);
|
||||
mikc_queue_pages = ((num_processors * MASTER_IKCQ_PKTSIZE)
|
||||
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
|
||||
|
||||
ihk_ikc_init_queue(rq, 0, 0, PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
|
||||
ihk_ikc_init_queue(wq, 0, 0, PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
|
||||
/* Place both sides in this side */
|
||||
rq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
|
||||
wq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
|
||||
|
||||
ihk_ikc_init_queue(rq, 0, 0,
|
||||
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
|
||||
ihk_ikc_init_queue(wq, 0, 0,
|
||||
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
|
||||
|
||||
arch_master_channel_packet_handler = packet_handler;
|
||||
|
||||
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
|
||||
ihk_ikc_master_channel_packet_handler);
|
||||
ihk_ikc_master_channel_packet_handler, channel);
|
||||
ihk_ikc_enable_channel(channel);
|
||||
|
||||
/* Set boot parameter */
|
||||
|
||||
@ -12,19 +12,72 @@
|
||||
#include <errno.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <registers.h>
|
||||
#include <mc_perf_event.h>
|
||||
|
||||
extern unsigned int *x86_march_perfmap;
|
||||
extern int running_on_kvm(void);
|
||||
|
||||
#define X86_CR4_PCE 0x00000100
|
||||
|
||||
int perf_counters_discovered = 0;
|
||||
int X86_IA32_NUM_PERF_COUNTERS = 0;
|
||||
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0;
|
||||
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0;
|
||||
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0;
|
||||
|
||||
void x86_init_perfctr(void)
|
||||
{
|
||||
int i = 0;
|
||||
unsigned long reg;
|
||||
unsigned long value = 0;
|
||||
uint64_t op;
|
||||
uint64_t eax;
|
||||
uint64_t ebx;
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
/* Do not do it on KVM */
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
/* Allow PMC to be read from user space */
|
||||
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
||||
reg |= X86_CR4_PCE;
|
||||
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
||||
|
||||
/* Detect number of supported performance counters */
|
||||
if (!perf_counters_discovered) {
|
||||
/* See Table 35.2 - Architectural MSRs in Vol 3C */
|
||||
op = 0x0a;
|
||||
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
|
||||
|
||||
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
|
||||
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1;
|
||||
|
||||
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
|
||||
X86_IA32_FIXED_PERF_COUNTERS_MASK =
|
||||
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) <<
|
||||
X86_IA32_BASE_FIXED_PERF_COUNTERS;
|
||||
|
||||
perf_counters_discovered = 1;
|
||||
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n",
|
||||
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS);
|
||||
}
|
||||
|
||||
/* Clear Fixed Counter Control */
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= 0xfffffffffffff000L;
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
/* Clear Generic Counter Control */
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||
}
|
||||
|
||||
/* Enable PMC Control */
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
}
|
||||
|
||||
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
||||
@ -33,20 +86,53 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
// clear mode flags
|
||||
value &= ~(3 << 16);
|
||||
|
||||
// set mode flags
|
||||
if(mode & PERFCTR_USER_MODE) {
|
||||
value |= 1 << 16;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
}
|
||||
if(mode & PERFCTR_KERNEL_MODE) {
|
||||
value |= 1 << 17;
|
||||
}
|
||||
}
|
||||
|
||||
// wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
|
||||
|
||||
value |= (1 << 22) | (1 << 18); /* EN */
|
||||
value |= (1 << 20); /* Enable overflow interrupt */
|
||||
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
|
||||
kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
||||
kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
||||
//kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_pmc_x86_direct(int counter, long val)
|
||||
{
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
val &= 0x000000ffffffffff; // 40bit Mask
|
||||
|
||||
cnt_bit = 1UL << counter;
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// set generic pmc
|
||||
wrmsr(MSR_IA32_PMC0 + counter, val);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// set fixed pmc
|
||||
wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val);
|
||||
}
|
||||
else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -57,6 +143,45 @@ static int set_perfctr_x86(int counter, int event, int mask, int inv, int count,
|
||||
CVAL2(event, mask, inv, count));
|
||||
}
|
||||
|
||||
static int set_fixed_counter(int counter, int mode)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
unsigned int ctr_mask = 0xf;
|
||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||
unsigned int set_val = 0;
|
||||
|
||||
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// clear specified fixed counter info
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
ctr_mask <<= counter_idx * 4;
|
||||
value &= ~ctr_mask;
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
set_val |= 1 << 1;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
set_val |= 1;
|
||||
}
|
||||
|
||||
set_val <<= counter_idx * 4;
|
||||
value |= set_val;
|
||||
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return set_perfctr_x86_direct(counter, mode, code);
|
||||
}
|
||||
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
@ -78,14 +203,15 @@ extern void x86_march_perfctr_start(unsigned long counter_mask);
|
||||
|
||||
int ihk_mc_perfctr_start(unsigned long counter_mask)
|
||||
{
|
||||
unsigned int value = 0;
|
||||
unsigned long value = 0;
|
||||
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
|
||||
#ifdef HAVE_MARCH_PERFCTR_START
|
||||
x86_march_perfctr_start(counter_mask);
|
||||
#endif
|
||||
counter_mask &= ((1 << X86_IA32_NUM_PERF_COUNTERS) - 1);
|
||||
counter_mask &= mask;
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= counter_mask;
|
||||
value |= counter_mask;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
|
||||
return 0;
|
||||
@ -93,25 +219,78 @@ int ihk_mc_perfctr_start(unsigned long counter_mask)
|
||||
|
||||
int ihk_mc_perfctr_stop(unsigned long counter_mask)
|
||||
{
|
||||
unsigned int value;
|
||||
unsigned long value;
|
||||
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
|
||||
counter_mask &= ((1 << X86_IA32_NUM_PERF_COUNTERS) - 1);
|
||||
counter_mask &= mask;
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value &= ~counter_mask;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
|
||||
if(counter_mask >> 32 & 0x1) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
if(counter_mask >> 32 & 0x2) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf << 4);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
if(counter_mask >> 32 & 0x4) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf << 8);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// init for fixed counter
|
||||
int ihk_mc_perfctr_fixed_init(int counter, int mode)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
unsigned int ctr_mask = 0xf;
|
||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||
unsigned int set_val = 0;
|
||||
|
||||
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// clear specified fixed counter info
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
ctr_mask <<= counter_idx * 4;
|
||||
value &= ~ctr_mask;
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
set_val |= 1 << 1;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
set_val |= 1;
|
||||
}
|
||||
|
||||
// enable PMI on overflow
|
||||
set_val |= 1 << 3;
|
||||
|
||||
set_val <<= counter_idx * 4;
|
||||
value |= set_val;
|
||||
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_reset(int counter)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return set_pmc_x86_direct(counter, 0);
|
||||
}
|
||||
|
||||
wrmsr(MSR_IA32_PMC0 + counter, 0);
|
||||
|
||||
return 0;
|
||||
int ihk_mc_perfctr_set(int counter, long val)
|
||||
{
|
||||
return set_pmc_x86_direct(counter, val);
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
|
||||
@ -129,10 +308,87 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
|
||||
|
||||
unsigned long ihk_mc_perfctr_read(int counter)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
unsigned long retval = 0;
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return rdpmc(counter);
|
||||
cnt_bit = 1UL << counter;
|
||||
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// read generic pmc
|
||||
retval = rdpmc(counter);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// read fixed pmc
|
||||
retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS));
|
||||
}
|
||||
else {
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
// read by rdmsr
|
||||
unsigned long ihk_mc_perfctr_read_msr(int counter)
|
||||
{
|
||||
unsigned int idx = 0;
|
||||
unsigned long retval = 0;
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cnt_bit = 1UL << counter;
|
||||
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// read generic pmc
|
||||
idx = MSR_IA32_PMC0 + counter;
|
||||
retval = (unsigned long) rdmsr(idx);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// read fixed pmc
|
||||
idx = MSR_IA32_FIXED_CTR0 + counter;
|
||||
retval = (unsigned long) rdmsr(idx);
|
||||
}
|
||||
else {
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status)
|
||||
{
|
||||
int ret = -1;
|
||||
int i = 0;
|
||||
|
||||
if(*type == PERF_TYPE_HARDWARE) {
|
||||
switch(*config){
|
||||
case PERF_COUNT_HW_INSTRUCTIONS :
|
||||
*type = PERF_TYPE_RAW;
|
||||
*config = 0x5300c0;
|
||||
break;
|
||||
default :
|
||||
// Unexpected config
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if(*type != PERF_TYPE_RAW) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// find avail generic counter
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
if(!(pmc_status & (1 << i))) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,8 @@
|
||||
* implements x86's vsyscall
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 Hitachi, Ltd.
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@ -16,20 +18,93 @@
|
||||
*/
|
||||
|
||||
#include <syscall.h>
|
||||
#include <ihk/atomic.h>
|
||||
#include <arch/cpu.h>
|
||||
|
||||
extern int vsyscall_gettimeofday(void *tv, void *tz)
|
||||
extern int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
__attribute__ ((section (".vsyscall.gettimeofday")));
|
||||
|
||||
int vsyscall_gettimeofday(void *tv, void *tz)
|
||||
struct tod_data_s tod_data
|
||||
__attribute__ ((section(".vsyscall.gettimeofday.data"))) = {
|
||||
.do_local = 0,
|
||||
.version = IHK_ATOMIC64_INIT(0),
|
||||
};
|
||||
|
||||
static inline void cpu_pause_for_vsyscall(void)
|
||||
{
|
||||
asm volatile ("pause" ::: "memory");
|
||||
return;
|
||||
} /* cpu_pause_for_vsyscall() */
|
||||
|
||||
static inline void calculate_time_from_tsc(struct timespec *ts)
|
||||
{
|
||||
long ver;
|
||||
unsigned long current_tsc;
|
||||
__time_t sec_delta;
|
||||
long ns_delta;
|
||||
|
||||
for (;;) {
|
||||
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
|
||||
/* settimeofday() is in progress */
|
||||
cpu_pause_for_vsyscall();
|
||||
}
|
||||
rmb();
|
||||
*ts = tod_data.origin;
|
||||
rmb();
|
||||
if (ver == ihk_atomic64_read(&tod_data.version)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* settimeofday() has intervened */
|
||||
cpu_pause_for_vsyscall();
|
||||
}
|
||||
|
||||
current_tsc = rdtsc();
|
||||
sec_delta = current_tsc / tod_data.clocks_per_sec;
|
||||
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
|
||||
/ tod_data.clocks_per_sec;
|
||||
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
|
||||
|
||||
ts->tv_sec += sec_delta;
|
||||
ts->tv_nsec += ns_delta;
|
||||
if (ts->tv_nsec >= NS_PER_SEC) {
|
||||
ts->tv_nsec -= NS_PER_SEC;
|
||||
++ts->tv_sec;
|
||||
}
|
||||
|
||||
return;
|
||||
} /* calculate_time_from_tsc() */
|
||||
|
||||
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
|
||||
{
|
||||
int error;
|
||||
struct timespec ats;
|
||||
|
||||
if (!tv && !tz) {
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Do it locally if supported */
|
||||
if (!tz && tod_data.do_local) {
|
||||
calculate_time_from_tsc(&ats);
|
||||
|
||||
tv->tv_sec = ats.tv_sec;
|
||||
tv->tv_usec = ats.tv_nsec / 1000;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Otherwise syscall */
|
||||
asm ("syscall" : "=a" (error)
|
||||
: "a" (__NR_gettimeofday), "D" (tv), "S" (tz)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
if (error) {
|
||||
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
|
||||
}
|
||||
return error;
|
||||
}
|
||||
} /* vsyscall_gettimeofday() */
|
||||
|
||||
extern long vsyscall_time(void *tp)
|
||||
__attribute__ ((section (".vsyscall.time")));
|
||||
@ -58,3 +133,17 @@ long vsyscall_time(void *tp)
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
__attribute__ ((section (".vsyscall.getcpu")));
|
||||
|
||||
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
|
||||
{
|
||||
int error;
|
||||
|
||||
asm ("syscall" : "=a" (error)
|
||||
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
|
||||
: "%rcx", "%r11", "memory");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
@ -0,0 +1,28 @@
|
||||
# irqbalance is a daemon process that distributes interrupts across
|
||||
# CPUS on SMP systems. The default is to rebalance once every 10
|
||||
# seconds. This is the environment file that is specified to systemd via the
|
||||
# EnvironmentFile key in the service unit file (or via whatever method the init
|
||||
# system you're using has.
|
||||
#
|
||||
# ONESHOT=yes
|
||||
# after starting, wait for a minute, then look at the interrupt
|
||||
# load and balance it once; after balancing exit and do not change
|
||||
# it again.
|
||||
#IRQBALANCE_ONESHOT=
|
||||
|
||||
#
|
||||
# IRQBALANCE_BANNED_CPUS
|
||||
# 64 bit bitmask which allows you to indicate which cpu's should
|
||||
# be skipped when reblancing irqs. Cpu numbers which have their
|
||||
# corresponding bits set to one in this mask will not have any
|
||||
# irq's assigned to them on rebalance
|
||||
#
|
||||
IRQBALANCE_BANNED_CPUS=%mask%
|
||||
|
||||
#
|
||||
# IRQBALANCE_ARGS
|
||||
# append any args here to the irqbalance daemon as documented in the man page
|
||||
#
|
||||
IRQBALANCE_ARGS=--banirq=%banirq%
|
||||
|
||||
|
||||
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=irqbalance daemon
|
||||
After=syslog.target
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=@ETCDIR@/irqbalance_mck
|
||||
ExecStart=/usr/sbin/irqbalance --foreground $IRQBALANCE_ARGS
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
46
arch/x86/tools/mcreboot-builtin-x86.sh.in
Executable file
@ -0,0 +1,46 @@
|
||||
#!/bin/bash -x
|
||||
|
||||
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel boot script
|
||||
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
|
||||
# HISTORY:
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
kill -9 `pidof mcexec`
|
||||
if lsmod | grep mcctrl > /dev/null 2>&1; then
|
||||
rmmod mcctrl || exit 1
|
||||
fi
|
||||
if lsmod | grep dcfa > /dev/null 2>&1; then
|
||||
rmmod dcfa || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
|
||||
rmmod ihk_builtin || exit 1
|
||||
fi
|
||||
if lsmod | grep ihk > /dev/null 2>&1; then
|
||||
rmmod ihk || exit 1
|
||||
fi
|
||||
insmod "$KMODDIR/ihk.ko" &&
|
||||
insmod "$KMODDIR/ihk_builtin.ko" &&
|
||||
"$SBINDIR/ihkconfig" 0 create &&
|
||||
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
|
||||
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
|
||||
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
|
||||
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
|
||||
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
|
||||
"$SBINDIR/ihkosctl" 0 boot &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
insmod "$KMODDIR/mcctrl.ko" &&
|
||||
sleep 1 &&
|
||||
"$SBINDIR/ihkosctl" 0 kmsg &&
|
||||
exit 0
|
||||
475
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
475
arch/x86/tools/mcreboot-smp-x86.sh.in
Normal file
@ -0,0 +1,475 @@
|
||||
#!/bin/bash
|
||||
|
||||
# IHK SMP-x86 example boot script.
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2014 RIKEN AICS
|
||||
#
|
||||
# This is an example script for loading IHK, configuring a partition and
|
||||
# booting McKernel on it. Unless specific CPUs and memory are requested,
|
||||
# the script reserves half of the CPU cores and 512MB of RAM from
|
||||
# NUMA node 0 when IHK is loaded for the first time.
|
||||
# Otherwise, it destroys the current McKernel instance and reboots it using
|
||||
# the same set of resources as it used previously.
|
||||
# Note that the script does not output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="${prefix}/bin"
|
||||
SBINDIR="${prefix}/sbin"
|
||||
ETCDIR=@ETCDIR@
|
||||
KMODDIR="${prefix}/kmod"
|
||||
KERNDIR="${prefix}/@TARGET@/kernel"
|
||||
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
|
||||
|
||||
mem="512M@0"
|
||||
cpus=""
|
||||
|
||||
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
|
||||
echo "You need at least bash-4.0 to run this script." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
INTERVAL=1
|
||||
LOGMODE=0
|
||||
facility="LOG_LOCAL6"
|
||||
chown_option=`logname 2> /dev/null`
|
||||
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" -o "`systemctl status irqbalance.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
irqbalance_used="yes"
|
||||
else
|
||||
irqbalance_used="no"
|
||||
fi
|
||||
|
||||
turbo=""
|
||||
|
||||
while getopts :ti:k:c:m:o:f: OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
f) facility=${OPTARG}
|
||||
;;
|
||||
o) chown_option=${OPTARG}
|
||||
;;
|
||||
i) INTERVAL=${OPTARG}
|
||||
expr "${INTERVAL}" + 1 > /dev/null 2>&1
|
||||
if [ $? -ge 2 ]
|
||||
then
|
||||
echo "invalid -i value" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ${INTERVAL} -le 0 ]
|
||||
then
|
||||
echo "invalid -i value" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
k) LOGMODE=${OPTARG}
|
||||
expr "${LOGMODE}" + 1 > /dev/null 2>&1
|
||||
if [ $? -ge 2 ]
|
||||
then
|
||||
echo "invalid -k value" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ${LOGMODE} -lt 0 -o ${LOGMODE} -gt 2 ]
|
||||
then
|
||||
echo "invalid -k value" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
c) cpus=${OPTARG}
|
||||
;;
|
||||
m) mem=${OPTARG}
|
||||
;;
|
||||
t) turbo="turbo"
|
||||
;;
|
||||
*) echo "invalid option -${OPT}" >&2
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
#
|
||||
# Revert any state that has been initialized before the error occured.
|
||||
#
|
||||
error_exit() {
|
||||
local status=$1
|
||||
|
||||
case $status in
|
||||
mcos_sys_mounted)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umount /tmp/mcos/mcos0_sys
|
||||
fi
|
||||
;&
|
||||
mcos_proc_mounted)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umount /tmp/mcos/mcos0_proc
|
||||
fi
|
||||
;&
|
||||
mcoverlayfs_loaded)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
rmmod mcoverlay
|
||||
fi
|
||||
;&
|
||||
linux_proc_bind_mounted)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umount /tmp/mcos/linux_proc
|
||||
fi
|
||||
;&
|
||||
tmp_mcos_mounted)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
umount /tmp/mcos
|
||||
fi
|
||||
;&
|
||||
tmp_mcos_created)
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
rm -rf /tmp/mcos
|
||||
fi
|
||||
;&
|
||||
os_created)
|
||||
# Destroy all LWK instances
|
||||
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then
|
||||
echo "warning: failed to destroy LWK instance $ind" >&2
|
||||
fi
|
||||
done
|
||||
fi
|
||||
;&
|
||||
mcctrl_loaded)
|
||||
rmmod mcctrl || echo "warning: failed to remove mcctrl" >&2
|
||||
;&
|
||||
mem_reserved)
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if [ "${mem}" != "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
|
||||
echo "warning: failed to release memory" >&2
|
||||
fi
|
||||
fi
|
||||
;&
|
||||
cpus_reserved)
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if [ "${cpus}" != "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then
|
||||
echo "warning: failed to release CPUs" >&2
|
||||
fi
|
||||
fi
|
||||
;&
|
||||
ihk_smp_loaded)
|
||||
rmmod ihk_smp_x86 || echo "warning: failed to remove ihk_smp_x86" >&2
|
||||
;&
|
||||
ihk_loaded)
|
||||
rmmod ihk || echo "warning: failed to remove ihk" >&2
|
||||
;&
|
||||
irqbalance_stopped)
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
|
||||
echo "warning: failed to stop irqbalance_mck" >&2
|
||||
fi
|
||||
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "warning: failed to disable irqbalance_mck" >&2
|
||||
fi
|
||||
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
|
||||
fi
|
||||
if ! systemctl start irqbalance.service; then
|
||||
echo "warning: failed to start irqbalance" >&2;
|
||||
fi
|
||||
fi
|
||||
;&
|
||||
initial)
|
||||
# Nothing more to revert
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
ihk_ikc_irq_core=0
|
||||
|
||||
release=`uname -r`
|
||||
major=`echo ${release} | sed -e 's/^\([0-9]*\).*/\1/'`
|
||||
minor=`echo ${release} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/'`
|
||||
patch=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/'`
|
||||
linux_version_code=`expr \( ${major} \* 65536 \) + \( ${minor} \* 256 \) + ${patch}`
|
||||
rhel_release=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/'`
|
||||
if [ "${release}" == "${rhel_release}" ]; then
|
||||
rhel_release="";
|
||||
fi
|
||||
|
||||
enable_mcoverlay="no"
|
||||
|
||||
if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
||||
if [ "${rhel_release}" == "" ]; then
|
||||
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
if [ ${linux_version_code} -ge 263680 -a ${linux_version_code} -lt 263936 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
else
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Figure out CPUs if not requested by user
|
||||
if [ "$cpus" == "" ]; then
|
||||
# Get the number of CPUs on NUMA node 0
|
||||
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
|
||||
|
||||
# Use the second half of the cores
|
||||
let nr_cpus="$nr_cpus / 2"
|
||||
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
||||
if [ "$cpus" == "" ]; then
|
||||
echo "error: no available CPUs on NUMA node 0?" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove mcoverlay if loaded
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
if [ "`lsmod | grep mcoverlay`" != "" ]; then
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_sys`" != "" ]; then umount -l /tmp/mcos/mcos0_sys; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_proc`" != "" ]; then umount -l /tmp/mcos/mcos0_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
|
||||
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
|
||||
if ! rmmod mcoverlay; then
|
||||
echo "error: removing mcoverlay" >&2
|
||||
error_exit "initial"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Stop irqbalance
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
systemctl stop irqbalance_mck.service 2>/dev/null
|
||||
if ! systemctl stop irqbalance.service 2>/dev/null ; then
|
||||
echo "error: stopping irqbalance" >&2
|
||||
error_exit "initial"
|
||||
fi;
|
||||
fi
|
||||
|
||||
# Start mcklogd. Note that McKernel blocks when kmsg buffer is full
|
||||
# with '-k 1' until mcklogd unblocks it so starting mcklogd must preceed
|
||||
# booting McKernel
|
||||
if [ ${LOGMODE} -ne 0 ]; then
|
||||
# Stop mcklogd which has survived McKernel shutdown because
|
||||
# mcstop+release.sh is not used
|
||||
pkill mcklogd
|
||||
SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL} -f ${facility}
|
||||
fi
|
||||
|
||||
# Load IHK if not loaded
|
||||
if [ "`lsmod | grep ihk`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/ihk.ko; then
|
||||
echo "error: loading ihk" >&2
|
||||
error_exit "irqbalance_stopped"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Drop Linux caches to free memory
|
||||
sync && echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
# Merge free memory areas into large, physically contigous ones
|
||||
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
|
||||
ihk_irq=""
|
||||
for i in `seq 64 255`; do
|
||||
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
|
||||
ihk_irq=$i
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "$ihk_irq" == "" ]; then
|
||||
echo "error: no IRQ available" >&2
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then
|
||||
echo "error: loading ihk-smp-x86" >&2
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
|
||||
echo "error: reserving CPUs" >&2;
|
||||
error_exit "ihk_smp_loaded"
|
||||
fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
|
||||
echo "error: reserving memory" >&2
|
||||
error_exit "cpus_reserved"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Load mcctrl if not loaded
|
||||
if [ "`lsmod | grep mcctrl`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/mcctrl.ko; then
|
||||
echo "error: inserting mcctrl.ko" >&2
|
||||
error_exit "mem_reserved"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then
|
||||
echo "error: destroying LWK instance $ind failed" >&2
|
||||
error_exit "mcctrl_loaded"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Create OS instance
|
||||
if ! ${SBINDIR}/ihkconfig 0 create; then
|
||||
echo "error: creating OS instance" >&2
|
||||
error_exit "mcctrl_loaded"
|
||||
fi
|
||||
|
||||
# Assign CPUs
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
|
||||
echo "error: assign CPUs" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
# Assign memory
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then
|
||||
echo "error: assign memory" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
# Load kernel image
|
||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
|
||||
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
# Set kernel arguments
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos ksyslogd=${LOGMODE} $turbo"; then
|
||||
echo "error: setting kernel arguments" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
# Boot OS instance
|
||||
if ! ${SBINDIR}/ihkosctl 0 boot; then
|
||||
echo "error: booting" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
# Set device file ownership
|
||||
if ! chown ${chown_option} /dev/mcd* /dev/mcos*; then
|
||||
echo "warning: failed to chown device files" >&2
|
||||
fi
|
||||
|
||||
# Overlay /proc, /sys with McKernel specific contents
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
if [ ! -e /tmp/mcos ]; then mkdir -p /tmp/mcos; fi
|
||||
if ! mount -t tmpfs tmpfs /tmp/mcos; then
|
||||
echo "error: mount /tmp/mcos" >&2
|
||||
error_exit "tmp_mcos_created"
|
||||
fi
|
||||
if [ ! -e /tmp/mcos/linux_proc ]; then mkdir -p /tmp/mcos/linux_proc; fi
|
||||
if ! mount --bind /proc /tmp/mcos/linux_proc; then
|
||||
echo "error: mount /tmp/mcos/linux_proc" >&2
|
||||
error_exit "tmp_mcos_mounted"
|
||||
fi
|
||||
if ! insmod ${KMODDIR}/mcoverlay.ko; then
|
||||
echo "error: inserting mcoverlay.ko" >&2
|
||||
error_exit "linux_proc_bind_mounted"
|
||||
fi
|
||||
while [ ! -e /proc/mcos0 ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
if [ ! -e /tmp/mcos/mcos0_proc ]; then mkdir -p /tmp/mcos/mcos0_proc; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_proc_upper ]; then mkdir -p /tmp/mcos/mcos0_proc_upper; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi
|
||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then
|
||||
echo "error: mounting /tmp/mcos/mcos0_proc" >&2
|
||||
error_exit "mcoverlayfs_loaded"
|
||||
fi
|
||||
# TODO: How de we revert this in case of failure??
|
||||
mount --make-rprivate /proc
|
||||
|
||||
while [ ! -e /sys/devices/virtual/mcos/mcos0/sys/setup_complete ]
|
||||
do
|
||||
sleep 0.1
|
||||
done
|
||||
if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi
|
||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then
|
||||
echo "error: mount /tmp/mcos/mcos0_sys" >&2
|
||||
error_exit "mcos_proc_mounted"
|
||||
fi
|
||||
# TODO: How de we revert this in case of failure??
|
||||
mount --make-rprivate /sys
|
||||
|
||||
rm -rf /tmp/mcos/mcos0_sys/setup_complete
|
||||
|
||||
# Hide NUMA related files which are outside the LWK partition
|
||||
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/drivers/processor/$cpuid
|
||||
else
|
||||
for nodeid in `find /sys/devices/system/cpu/$cpuid/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid/$nodeid" ]; then
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid/$nodeid
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
for nodeid in `find /sys/devices/system/node/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/*
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/node/devices/$nodeid
|
||||
else
|
||||
# Delete non-existent symlinks
|
||||
for cpuid in `find /sys/devices/system/node/$nodeid/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid/$cpuid" ]; then
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/$cpuid
|
||||
fi
|
||||
done
|
||||
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/memory*
|
||||
fi
|
||||
done
|
||||
rm -f /tmp/mcos/mcos0_sys/devices/system/node/has_*
|
||||
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Start irqbalance with CPUs and IRQ for McKernel banned
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }'; then
|
||||
echo "error: saving /proc/irq/*/smp_affinity" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
fi;
|
||||
|
||||
ncpus=`lscpu | grep -E '^CPU\(s\):' | awk '{print $2}'`
|
||||
smp_affinity_mask=`echo $cpus | ncpus=$ncpus perl -e 'while(<>){@tokens = split /,/;foreach $token (@tokens) {@nums = split /-/,$token; for($num = $nums[0]; $num <= $nums[$#nums]; $num++) {$ndx=int($num/32); $mask[$ndx] |= (1<<($num % 32))}}} $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if($j != $nint32s - 1){print ",";} $nblks = $j == $nint32s - 1 ? int(($ENV{'ncpus'} % 32)/4) : 8; for($i = $nblks - 1;$i >= 0;$i--){ printf("%01x",($mask[$j] >> ($i*4)) & 0xf);}}'`
|
||||
|
||||
if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then
|
||||
echo "error: modifying /proc/irq/*/smp_affinity" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
fi
|
||||
|
||||
banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'`
|
||||
|
||||
sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > $ETCDIR/irqbalance_mck
|
||||
if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "error: linking irqbalance_mck" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
fi
|
||||
|
||||
if ! systemctl start irqbalance_mck.service 2>/dev/null ; then
|
||||
echo "error: starting irqbalance_mck" >&2
|
||||
error_exit "mcos_sys_mounted"
|
||||
fi
|
||||
# echo cpus=$cpus mask=$smp_affinity_mask banirq=$banirq
|
||||
fi
|
||||
|
||||
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
16
arch/x86/tools/mcshutdown-builtin-x86.sh.in
Normal file
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
|
||||
# License details are found in the file LICENSE.
|
||||
# \brief
|
||||
# mckernel shutdown script
|
||||
#
|
||||
# \author McKernel Development Team
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
"$SBINDIR/ihkosctl" 0 shutdown
|
||||
115
arch/x86/tools/mcstop+release-smp-x86.sh.in
Normal file
115
arch/x86/tools/mcstop+release-smp-x86.sh.in
Normal file
@ -0,0 +1,115 @@
|
||||
#!/bin/bash
|
||||
|
||||
# IHK SMP-x86 example McKernel unload script.
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2015 RIKEN AICS
|
||||
#
|
||||
# This is an example script for destroying McKernel and releasing IHK resources
|
||||
# Note that the script does no output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
ETCDIR=@ETCDIR@
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
mem=""
|
||||
cpus=""
|
||||
|
||||
# No SMP module? Exit.
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit 0; fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then
|
||||
echo "error: destroying LWK instance $ind failed" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Query IHK-SMP resources and release them
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
|
||||
echo "error: querying cpus" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if [ "${cpus}" != "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then
|
||||
echo "error: releasing CPUs" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then
|
||||
echo "error: querying memory" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if [ "${mem}" != "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
|
||||
echo "error: releasing memory" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then
|
||||
echo "error: removing mcctrl" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove mcoverlay if loaded
|
||||
if [ "`lsmod | grep mcoverlay`" != "" ]; then
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_sys`" != "" ]; then umount -l /tmp/mcos/mcos0_sys; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_proc`" != "" ]; then umount -l /tmp/mcos/mcos0_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
|
||||
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
|
||||
if ! rmmod mcoverlay; then
|
||||
echo "warning: failed to remove mcoverlay" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove SMP module
|
||||
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
|
||||
if ! rmmod ihk_smp_x86; then
|
||||
echo "error: removing ihk_smp_x86" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove core module
|
||||
if [ "`lsmod | grep -E 'ihk\s' | awk '{print $1}'`" != "" ]; then
|
||||
if ! rmmod ihk; then
|
||||
echo "error: removing ihk" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Stop mcklogd
|
||||
pkill mcklogd
|
||||
|
||||
# Start irqbalance with the original settings
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
|
||||
echo "warning: failed to stop irqbalance_mck" >&2
|
||||
fi
|
||||
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
|
||||
echo "warning: failed to disable irqbalance_mck" >&2
|
||||
fi
|
||||
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
|
||||
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
|
||||
fi
|
||||
if ! systemctl start irqbalance.service; then
|
||||
echo "warning: failed to start irqbalance" >&2;
|
||||
fi
|
||||
fi
|
||||
|
||||
197
configure.ac
197
configure.ac
@ -24,13 +24,30 @@ AC_ARG_WITH([kernelsrc],
|
||||
|
||||
AC_ARG_WITH([target],
|
||||
AC_HELP_STRING(
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86}],[target, default is attached-mic]),
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
|
||||
[WITH_TARGET=$withval],[WITH_TARGET=yes])
|
||||
|
||||
AC_ARG_WITH([system_map],
|
||||
AS_HELP_STRING(
|
||||
[--with-system_map=path],[Path to 'System.map file', default is /boot/System.map-uname_r]),
|
||||
[WITH_SYSTEM_MAP=$withval],[WITH_SYSTEM_MAP=yes])
|
||||
|
||||
AC_ARG_ENABLE([dcfa],
|
||||
[AS_HELP_STRING(
|
||||
[--enable-dcfa],[Enable DCFA modules])],[],[enable_dcfa=no])
|
||||
|
||||
AC_ARG_ENABLE([memdump],
|
||||
AC_HELP_STRING([--enable-memdump],
|
||||
[enable dumping memory and analyzing a dump]),
|
||||
[ENABLE_MEMDUMP=$enableval],
|
||||
[ENABLE_MEMDUMP=default])
|
||||
|
||||
AC_ARG_ENABLE([mcoverlayfs],
|
||||
AC_HELP_STRING([--enable-mcoverlayfs],
|
||||
[enable mcoverlayfs implementation]),
|
||||
[ENABLE_MCOVERLAYFS=$enableval],
|
||||
[ENABLE_MCOVERLAYFS=yes])
|
||||
|
||||
case "X$WITH_KERNELSRC" in
|
||||
Xyes | Xno | X)
|
||||
WITH_KERNELSRC='/lib/modules/`uname -r`/build'
|
||||
@ -49,9 +66,27 @@ fi
|
||||
test "x$prefix" = xNONE && prefix="$ac_default_prefix"
|
||||
|
||||
case $WITH_TARGET in
|
||||
attached-mic)
|
||||
attached-mic|builtin-x86|smp-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
CFLAGS="$CFLAGS -ffreestanding -fno-tree-loop-distribute-patterns"
|
||||
;;
|
||||
builtin-mic)
|
||||
ARCH=k1om
|
||||
AC_CHECK_PROG(XCC,
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[no])
|
||||
CC=$XCC
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
|
||||
;;
|
||||
esac
|
||||
|
||||
case $WITH_TARGET in
|
||||
attached-mic)
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -69,12 +104,6 @@ case $WITH_TARGET in
|
||||
fi
|
||||
;;
|
||||
builtin-mic)
|
||||
ARCH=k1om
|
||||
AC_CHECK_PROG(XCC,
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[no])
|
||||
CC=$XCC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -92,9 +121,6 @@ case $WITH_TARGET in
|
||||
fi
|
||||
;;
|
||||
builtin-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -111,6 +137,26 @@ case $WITH_TARGET in
|
||||
MANDIR="$prefix/attached/man"
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/smp-x86/kernel"
|
||||
fi
|
||||
if test "X$BINDIR" = X; then
|
||||
BINDIR="$prefix/bin"
|
||||
fi
|
||||
if test "X$SBINDIR" = X; then
|
||||
SBINDIR="$prefix/sbin"
|
||||
fi
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
if test "X$MANDIR" = X; then
|
||||
MANDIR="$prefix/smp-x86/man"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
|
||||
;;
|
||||
@ -119,6 +165,117 @@ esac
|
||||
KDIR="$WITH_KERNELSRC"
|
||||
TARGET="$WITH_TARGET"
|
||||
|
||||
MCCTRL_LINUX_SYMTAB=""
|
||||
case "X$WITH_SYSTEM_MAP" in
|
||||
Xyes | Xno | X)
|
||||
MCCTRL_LINUX_SYMTAB=""
|
||||
;;
|
||||
*)
|
||||
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_MSG_CHECKING([[for System.map]])
|
||||
if test -f "$MCCTRL_LINUX_SYMTAB"; then
|
||||
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
|
||||
elif test -f "/boot/System.map-`uname -r`"; then
|
||||
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
|
||||
elif test -f "$KDIR/System.map"; then
|
||||
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
|
||||
fi
|
||||
|
||||
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
|
||||
AC_MSG_ERROR([could not find])
|
||||
fi
|
||||
|
||||
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
|
||||
AC_MSG_ERROR([could not read System.map file, no read permission?])
|
||||
fi
|
||||
AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB])
|
||||
|
||||
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
|
||||
|
||||
# MCCTRL_FIND_KSYM(SYMBOL)
|
||||
# ------------------------------------------------------
|
||||
# Search System.map for address of the given symbol and
|
||||
# do one of three things in config.h:
|
||||
# If not found, leave MCCTRL_KSYM_foo undefined
|
||||
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
|
||||
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
|
||||
AC_DEFUN([MCCTRL_FIND_KSYM],[
|
||||
AC_MSG_CHECKING([[System.map for symbol $1]])
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1`
|
||||
if test -z $mcctrl_addr; then
|
||||
AC_MSG_RESULT([not found])
|
||||
else
|
||||
mcctrl_result=$mcctrl_addr
|
||||
mcctrl_addr="0x$mcctrl_addr"
|
||||
m4_ifval([$2],[],[
|
||||
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then
|
||||
mcctrl_result="exported"
|
||||
mcctrl_addr="0"
|
||||
fi
|
||||
])
|
||||
AC_MSG_RESULT([$mcctrl_result])
|
||||
AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported])
|
||||
fi
|
||||
])
|
||||
|
||||
MCCTRL_FIND_KSYM([sys_mount])
|
||||
MCCTRL_FIND_KSYM([sys_umount])
|
||||
MCCTRL_FIND_KSYM([sys_unshare])
|
||||
MCCTRL_FIND_KSYM([zap_page_range])
|
||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||
MCCTRL_FIND_KSYM([vdso_start])
|
||||
MCCTRL_FIND_KSYM([vdso_end])
|
||||
MCCTRL_FIND_KSYM([vdso_pages])
|
||||
MCCTRL_FIND_KSYM([__vvar_page])
|
||||
MCCTRL_FIND_KSYM([hpet_address])
|
||||
MCCTRL_FIND_KSYM([hv_clock])
|
||||
MCCTRL_FIND_KSYM([sys_readlink])
|
||||
|
||||
case $ENABLE_MEMDUMP in
|
||||
yes|no|auto)
|
||||
;;
|
||||
default)
|
||||
if test "x$WITH_TARGET" = "xsmp-x86" ; then
|
||||
ENABLE_MEMDUMP=auto
|
||||
else
|
||||
ENABLE_MEMDUMP=no
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([unknown memdump argument: $ENABLE_MEMDUMP])
|
||||
;;
|
||||
esac
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" != "xno" ; then
|
||||
enableval=yes
|
||||
AC_CHECK_LIB([bfd],[bfd_init],[],[enableval=no])
|
||||
AC_CHECK_HEADER([bfd.h],[],[enableval=no])
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" = "xyes" -a "x$enableval" = "xno" ; then
|
||||
AC_MSG_ERROR([memdump feature needs bfd.h and libbfd a.k.a bunutils-devel])
|
||||
fi
|
||||
ENABLE_MEMDUMP=$enableval
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" = "xyes" ; then
|
||||
AC_MSG_NOTICE([memdump feature is enabled])
|
||||
AC_DEFINE([ENABLE_MEMDUMP],[1],[whether memdump feature is enabled])
|
||||
uncomment_if_ENABLE_MEMDUMP=''
|
||||
else
|
||||
AC_MSG_NOTICE([memdump feature is disabled])
|
||||
uncomment_if_ENABLE_MEMDUMP='#'
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_MCOVERLAYFS" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_MCOVERLAYFS],[1],[whether mcoverlayfs is enabled])
|
||||
AC_MSG_NOTICE([mcoverlayfs is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([mcoverlayfs is disabled])
|
||||
fi
|
||||
|
||||
AC_SUBST(CC)
|
||||
AC_SUBST(XCC)
|
||||
AC_SUBST(ARCH)
|
||||
@ -126,9 +283,12 @@ AC_SUBST(KDIR)
|
||||
AC_SUBST(TARGET)
|
||||
AC_SUBST(BINDIR)
|
||||
AC_SUBST(SBINDIR)
|
||||
AC_SUBST(ETCDIR)
|
||||
AC_SUBST(KMODDIR)
|
||||
AC_SUBST(KERNDIR)
|
||||
AC_SUBST(MANDIR)
|
||||
AC_SUBST(CFLAGS)
|
||||
AC_SUBST(ENABLE_MCOVERLAYFS)
|
||||
|
||||
AC_SUBST(IHK_VERSION)
|
||||
AC_SUBST(MCKERNEL_VERSION)
|
||||
@ -136,16 +296,29 @@ AC_SUBST(DCFA_VERSION)
|
||||
AC_SUBST(IHK_RELEASE_DATE)
|
||||
AC_SUBST(MCKERNEL_RELEASE_DATE)
|
||||
AC_SUBST(DCFA_RESEASE_DATE)
|
||||
AC_SUBST(uncomment_if_ENABLE_MEMDUMP)
|
||||
|
||||
AC_CONFIG_HEADERS([executer/config.h])
|
||||
AC_CONFIG_FILES([
|
||||
Makefile
|
||||
executer/user/Makefile
|
||||
executer/kernel/Makefile
|
||||
executer/kernel/mcctrl/Makefile
|
||||
executer/kernel/mcctrl/arch/x86_64/Makefile
|
||||
executer/kernel/mcoverlayfs/Makefile
|
||||
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile
|
||||
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile
|
||||
executer/kernel/mcoverlayfs/linux-4.6.7/Makefile
|
||||
kernel/Makefile
|
||||
kernel/Makefile.build
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
arch/x86/tools/mcshutdown-attached-mic.sh
|
||||
arch/x86/tools/mcreboot-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot-smp-x86.sh
|
||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
arch/x86/tools/irqbalance_mck.service
|
||||
arch/x86/tools/irqbalance_mck.in
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_dcfa" = xyes], [
|
||||
|
||||
94
executer/config.h.in
Normal file
94
executer/config.h.in
Normal file
@ -0,0 +1,94 @@
|
||||
/* executer/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
#undef ENABLE_MCOVERLAYFS
|
||||
|
||||
/* whether memdump feature is enabled */
|
||||
#undef ENABLE_MEMDUMP
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `bfd' library (-lbfd). */
|
||||
#undef HAVE_LIBBFD
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to address of kernel symbol __vvar_page, or 0 if exported */
|
||||
#undef MCCTRL_KSYM___vvar_page
|
||||
|
||||
/* Define to address of kernel symbol hpet_address, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_hpet_address
|
||||
|
||||
/* Define to address of kernel symbol hv_clock, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_hv_clock
|
||||
|
||||
/* Define to address of kernel symbol sys_mount, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_mount
|
||||
|
||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_readlink
|
||||
|
||||
/* Define to address of kernel symbol sys_umount, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_umount
|
||||
|
||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_unshare
|
||||
|
||||
/* Define to address of kernel symbol vdso_end, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_end
|
||||
|
||||
/* Define to address of kernel symbol vdso_image_64, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_image_64
|
||||
|
||||
/* Define to address of kernel symbol vdso_pages, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_pages
|
||||
|
||||
/* Define to address of kernel symbol vdso_start, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_start
|
||||
|
||||
/* Define to address of kernel symbol zap_page_range, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_zap_page_range
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
@ -38,6 +38,10 @@
|
||||
#define MCEXEC_UP_SEND_SIGNAL 0x30a02906
|
||||
#define MCEXEC_UP_GET_CPU 0x30a02907
|
||||
#define MCEXEC_UP_STRNCPY_FROM_USER 0x30a02908
|
||||
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
|
||||
#define MCEXEC_UP_GET_CRED 0x30a0290a
|
||||
#define MCEXEC_UP_GET_CREDV 0x30a0290b
|
||||
#define MCEXEC_UP_GET_NODES 0x30a0290c
|
||||
|
||||
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
|
||||
#define MCEXEC_UP_FREE_DMA 0x30a02911
|
||||
@ -45,6 +49,12 @@
|
||||
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||
#define MCEXEC_UP_SYS_UMOUNT 0x30a02915
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02916
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
|
||||
#define MCEXEC_UP_TRANSFER_FROM_REMOTE 1
|
||||
|
||||
@ -67,6 +77,7 @@ struct program_image_section {
|
||||
};
|
||||
|
||||
#define SHELL_PATH_MAX_LEN 1024
|
||||
#define MCK_RLIM_MAX 20
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
@ -76,6 +87,10 @@ struct program_load_desc {
|
||||
int err;
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
int reloc;
|
||||
char enable_vdso;
|
||||
char padding[7];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@ -90,14 +105,20 @@ struct program_load_desc {
|
||||
unsigned long args_len;
|
||||
char *envs;
|
||||
unsigned long envs_len;
|
||||
unsigned long rlimit_stack_cur;
|
||||
unsigned long rlimit_stack_max;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
struct program_image_section sections[0];
|
||||
};
|
||||
|
||||
struct syscall_request {
|
||||
/* TID of requesting thread */
|
||||
int rtid;
|
||||
/*
|
||||
* TID of target thread. Remote page fault response needs to designate the
|
||||
* thread that must serve the request, 0 indicates any thread from the pool
|
||||
*/
|
||||
int ttid;
|
||||
unsigned long valid;
|
||||
unsigned long number;
|
||||
unsigned long args[6];
|
||||
@ -116,8 +137,17 @@ struct syscall_load_desc {
|
||||
unsigned long size;
|
||||
};
|
||||
|
||||
#define IHK_SCD_REQ_THREAD_SPINNING 0
|
||||
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
|
||||
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
|
||||
|
||||
struct syscall_response {
|
||||
/* TID of the thread that requested the service */
|
||||
int ttid;
|
||||
/* TID of the mcexec thread that is serving or has served the request */
|
||||
int stid;
|
||||
unsigned long status;
|
||||
unsigned long req_thread_status;
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
@ -156,4 +186,24 @@ struct signal_desc {
|
||||
char info[128];
|
||||
};
|
||||
|
||||
struct newprocess_desc {
|
||||
int pid;
|
||||
};
|
||||
|
||||
struct sys_mount_desc {
|
||||
char *dev_name;
|
||||
char *dir_name;
|
||||
char *type;
|
||||
unsigned long flags;
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct sys_umount_desc {
|
||||
char *dir_name;
|
||||
};
|
||||
|
||||
struct sys_unshare_desc {
|
||||
unsigned long unshare_flags;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,25 +0,0 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
IHK_BASE=$(src)/../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcctrl.ko $(KMODDIR)
|
||||
@ -1,916 +0,0 @@
|
||||
/**
|
||||
* \file executer/kernel/control.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* kernel module control
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
* 2013/08/07 nakamura add page fault forwarding
|
||||
* 2013/07/05 shirasawa propagate error code for prepare image
|
||||
* 2013/07/02 shirasawa add error handling for prepare_process
|
||||
* 2013/04/17 nakamura add generic system call forwarding
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#define dprintk printk
|
||||
#else
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
//static DECLARE_WAIT_QUEUE_HEAD(wq_prepare);
|
||||
//extern struct mcctrl_channel *channels;
|
||||
int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu);
|
||||
|
||||
static long mcexec_prepare_image(ihk_os_t os,
|
||||
struct program_load_desc * __user udesc)
|
||||
{
|
||||
struct program_load_desc desc, *pdesc;
|
||||
struct ikc_scd_packet isp;
|
||||
void *args, *envs;
|
||||
long ret = 0;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
unsigned long flags;
|
||||
struct mcctrl_per_proc_data *ppd = NULL;
|
||||
|
||||
if (copy_from_user(&desc, udesc,
|
||||
sizeof(struct program_load_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
if (desc.num_sections <= 0 || desc.num_sections > 16) {
|
||||
printk("# of sections: %d\n", desc.num_sections);
|
||||
return -EINVAL;
|
||||
}
|
||||
pdesc = kmalloc(sizeof(struct program_load_desc) +
|
||||
sizeof(struct program_image_section)
|
||||
* desc.num_sections, GFP_KERNEL);
|
||||
memcpy(pdesc, &desc, sizeof(struct program_load_desc));
|
||||
if (copy_from_user(pdesc->sections, udesc->sections,
|
||||
sizeof(struct program_image_section)
|
||||
* desc.num_sections)) {
|
||||
kfree(pdesc);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
pdesc->pid = task_tgid_vnr(current);
|
||||
|
||||
if (reserve_user_space(usrdata, &pdesc->user_start, &pdesc->user_end)) {
|
||||
kfree(pdesc);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
args = kmalloc(pdesc->args_len, GFP_KERNEL);
|
||||
if (copy_from_user(args, pdesc->args, pdesc->args_len)) {
|
||||
kfree(args);
|
||||
kfree(pdesc);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
envs = kmalloc(pdesc->envs_len, GFP_KERNEL);
|
||||
if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) {
|
||||
ret = -EFAULT;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
pdesc->args = (void*)virt_to_phys(args);
|
||||
printk("args: 0x%lX\n", (unsigned long)pdesc->args);
|
||||
printk("argc: %d\n", *(int*)args);
|
||||
pdesc->envs = (void*)virt_to_phys(envs);
|
||||
printk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
|
||||
printk("envc: %d\n", *(int*)envs);
|
||||
|
||||
isp.msg = SCD_MSG_PREPARE_PROCESS;
|
||||
isp.ref = pdesc->cpu;
|
||||
isp.arg = virt_to_phys(pdesc);
|
||||
|
||||
printk("# of sections: %d\n", pdesc->num_sections);
|
||||
printk("%p (%lx)\n", pdesc, isp.arg);
|
||||
|
||||
pdesc->status = 0;
|
||||
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
|
||||
wait_event_interruptible(usrdata->wq_prepare, pdesc->status);
|
||||
|
||||
if(pdesc->err < 0){
|
||||
ret = pdesc->err;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC);
|
||||
if (!ppd) {
|
||||
printk("ERROR: allocating per process data\n");
|
||||
ret = -ENOMEM;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ppd->pid = pdesc->pid;
|
||||
ppd->rpgtable = pdesc->rpgtable;
|
||||
|
||||
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
|
||||
list_add_tail(&ppd->list, &usrdata->per_proc_list);
|
||||
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
|
||||
|
||||
dprintk("pid %d, rpgtable: 0x%lx added\n",
|
||||
ppd->pid, ppd->rpgtable);
|
||||
|
||||
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
|
||||
sizeof(struct program_image_section) * desc.num_sections)) {
|
||||
ret = -EFAULT;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
free_out:
|
||||
kfree(args);
|
||||
kfree(pdesc);
|
||||
kfree(envs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
|
||||
{
|
||||
struct remote_transfer pt;
|
||||
unsigned long phys, ret = 0;
|
||||
void *rpm;
|
||||
#if 0
|
||||
unsigned long dma_status = 0;
|
||||
ihk_dma_channel_t channel;
|
||||
struct ihk_dma_request request;
|
||||
void *p;
|
||||
|
||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
||||
if (!channel) {
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (copy_from_user(&pt, upt, sizeof(pt))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (pt.size > PAGE_SIZE) {
|
||||
printk("mcexec_transfer_image(): ERROR: size exceeds PAGE_SIZE\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), pt.rphys, PAGE_SIZE);
|
||||
#ifdef CONFIG_MIC
|
||||
rpm = ioremap_wc(phys, PAGE_SIZE);
|
||||
#else
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
|
||||
#endif
|
||||
|
||||
if (pt.direction == MCEXEC_UP_TRANSFER_TO_REMOTE) {
|
||||
if (copy_from_user(rpm, pt.userp, pt.size)) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
else if (pt.direction == MCEXEC_UP_TRANSFER_FROM_REMOTE) {
|
||||
if (copy_to_user(pt.userp, rpm, pt.size)) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
else {
|
||||
printk("mcexec_transfer_image(): ERROR: invalid direction\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(rpm);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, PAGE_SIZE);
|
||||
#endif
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, PAGE_SIZE);
|
||||
|
||||
return ret;
|
||||
|
||||
#if 0
|
||||
p = (void *)__get_free_page(GFP_KERNEL);
|
||||
|
||||
if (copy_from_user(p, pt.src, PAGE_SIZE)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
request.src_os = NULL;
|
||||
request.src_phys = virt_to_phys(p);
|
||||
request.dest_os = os;
|
||||
request.dest_phys = pt.dest;
|
||||
request.size = PAGE_SIZE;
|
||||
request.notify = (void *)virt_to_phys(&dma_status);
|
||||
request.priv = (void *)1;
|
||||
|
||||
ihk_dma_request(channel, &request);
|
||||
|
||||
while (!dma_status) {
|
||||
mb();
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
free_page((unsigned long)p);
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//extern unsigned long last_thread_exec;
|
||||
|
||||
static long mcexec_start_image(ihk_os_t os,
|
||||
struct program_load_desc * __user udesc)
|
||||
{
|
||||
struct program_load_desc desc;
|
||||
struct ikc_scd_packet isp;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
|
||||
if (copy_from_user(&desc, udesc,
|
||||
sizeof(struct program_load_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
c = usrdata->channels + desc.cpu;
|
||||
|
||||
mcctrl_ikc_set_recv_cpu(os, desc.cpu);
|
||||
|
||||
usrdata->last_thread_exec = desc.cpu;
|
||||
|
||||
isp.msg = SCD_MSG_SCHEDULE_PROCESS;
|
||||
isp.ref = desc.cpu;
|
||||
isp.arg = desc.rprocess;
|
||||
|
||||
mcctrl_ikc_send(os, desc.cpu, &isp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(signalq);
|
||||
|
||||
static long mcexec_send_signal(ihk_os_t os, struct signal_desc *sigparam)
|
||||
{
|
||||
struct ikc_scd_packet isp;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct signal_desc sig;
|
||||
struct mcctrl_signal msig[2];
|
||||
struct mcctrl_signal *msigp;
|
||||
int rc;
|
||||
|
||||
if (copy_from_user(&sig, sigparam, sizeof(struct signal_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
msigp = msig;
|
||||
if(((unsigned long)msig & 0xfffffffffffff000L) !=
|
||||
((unsigned long)(msig + 1) & 0xfffffffffffff000L))
|
||||
msigp++;
|
||||
memset(msigp, '\0', sizeof msig);
|
||||
msigp->sig = sig.sig;
|
||||
msigp->pid = sig.pid;
|
||||
msigp->tid = sig.tid;
|
||||
memcpy(&msigp->info, &sig.info, 128);
|
||||
|
||||
c = usrdata->channels;
|
||||
isp.msg = SCD_MSG_SEND_SIGNAL;
|
||||
isp.ref = sig.cpu;
|
||||
isp.pid = sig.pid;
|
||||
isp.arg = virt_to_phys(msigp);
|
||||
|
||||
if((rc = mcctrl_ikc_send(os, sig.cpu, &isp)) < 0){
|
||||
printk("mcexec_send_signal: mcctrl_ikc_send ret=%d\n", rc);
|
||||
return rc;
|
||||
}
|
||||
wait_event_interruptible(signalq, msigp->cond != 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
sig_done(unsigned long arg, int err)
|
||||
{
|
||||
struct mcctrl_signal *msigp;
|
||||
|
||||
msigp = phys_to_virt(arg);
|
||||
msigp->cond = 1;
|
||||
wake_up_interruptible(&signalq);
|
||||
}
|
||||
|
||||
static long mcexec_get_cpu(ihk_os_t os)
|
||||
{
|
||||
struct ihk_cpu_info *info;
|
||||
|
||||
info = ihk_os_get_cpu_info(os);
|
||||
if (!info) {
|
||||
printk("Error: cannot retrieve CPU info.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (info->n_cpus < 1) {
|
||||
printk("Error: # of cpu is invalid.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return info->n_cpus;
|
||||
}
|
||||
|
||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg)
|
||||
{
|
||||
struct wait_queue_head_list_node *wqhln = NULL;
|
||||
struct wait_queue_head_list_node *wqhln_iter;
|
||||
unsigned long flags;
|
||||
|
||||
/* Look up per-process wait queue head with pid */
|
||||
flags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
||||
if (wqhln_iter->pid == pid) {
|
||||
wqhln = wqhln_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wqhln) {
|
||||
retry_alloc:
|
||||
wqhln = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
|
||||
if (!wqhln) {
|
||||
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
wqhln->pid = pid;
|
||||
wqhln->req = 0;
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
list_add_tail(&wqhln->list, &c->wq_list);
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
|
||||
|
||||
wqhln->req = 1;
|
||||
wake_up(&wqhln->wq_syscall);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef DO_USER_MODE
|
||||
// static int remaining_job, base_cpu, job_pos;
|
||||
#endif
|
||||
|
||||
// extern int num_channels;
|
||||
// extern int mcctrl_dma_abort;
|
||||
|
||||
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
||||
{
|
||||
struct syscall_wait_desc swd;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct wait_queue_head_list_node *wqhln;
|
||||
struct wait_queue_head_list_node *wqhln_iter;
|
||||
int ret = 0;
|
||||
unsigned long irqflags;
|
||||
#ifndef DO_USER_MODE
|
||||
unsigned long s, w, d;
|
||||
#endif
|
||||
|
||||
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
|
||||
if (copy_from_user(&swd, req, sizeof(swd))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (swd.cpu >= usrdata->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
c = get_peer_channel(usrdata, current);
|
||||
if (c) {
|
||||
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
|
||||
current, c);
|
||||
return -EBUSY;
|
||||
}
|
||||
c = usrdata->channels + swd.cpu;
|
||||
|
||||
#ifdef DO_USER_MODE
|
||||
retry:
|
||||
/* Prepare per-process wait queue head */
|
||||
retry_alloc:
|
||||
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
|
||||
if (!wqhln) {
|
||||
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
wqhln->pid = swd.pid;
|
||||
wqhln->req = 0;
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
/* First see if there is one wait queue already */
|
||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
||||
if (wqhln_iter->pid == current->tgid) {
|
||||
kfree(wqhln);
|
||||
wqhln = wqhln_iter;
|
||||
list_del(&wqhln->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_add_tail(&wqhln->list, &c->wq_list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
|
||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||
|
||||
if (ret) {
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
/* Remove per-process wait queue head */
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_del(&wqhln->list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
kfree(wqhln);
|
||||
|
||||
if (c->param.request_va->number == 61 &&
|
||||
c->param.request_va->args[0] == swd.pid) {
|
||||
|
||||
dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n",
|
||||
current->tgid,
|
||||
current->pid,
|
||||
c->param.request_va->number,
|
||||
swd.cpu);
|
||||
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
#if 1
|
||||
mb();
|
||||
if (!c->param.request_va->valid) {
|
||||
printk("mcexec_wait_syscall:stray wakeup\n");
|
||||
goto retry;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
while (1) {
|
||||
c = usrdata->channels + swd.cpu;
|
||||
rdtscll(s);
|
||||
if (!usrdata->remaining_job) {
|
||||
while (!(*c->param.doorbell_va)) {
|
||||
mb();
|
||||
cpu_relax();
|
||||
rdtscll(w);
|
||||
if (w > s + 1024UL * 1024 * 1024 * 10) {
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
d = (*c->param.doorbell_va) - 1;
|
||||
*c->param.doorbell_va = 0;
|
||||
|
||||
if (d < 0 || d >= usrdata->num_channels) {
|
||||
d = 0;
|
||||
}
|
||||
usrdata->base_cpu = d;
|
||||
usrdata->job_pos = 0;
|
||||
usrdata->remaining_job = 1;
|
||||
} else {
|
||||
usrdata->job_pos++;
|
||||
}
|
||||
|
||||
for (; usrdata->job_pos < usrdata->num_channels; usrdata->job_pos++) {
|
||||
if (base_cpu + job_pos >= num_channels) {
|
||||
c = usrdata->channels +
|
||||
(usrdata->base_cpu + usrdata->job_pos - usrdata->num_channels);
|
||||
} else {
|
||||
c = usrdata->channels + usrdata->base_cpu + usrdata->job_pos;
|
||||
}
|
||||
if (!c) {
|
||||
continue;
|
||||
}
|
||||
if (c->param.request_va &&
|
||||
c->param.request_va->valid) {
|
||||
#endif
|
||||
c->param.request_va->valid = 0; /* ack */
|
||||
dprintk("SC #%lx, %lx\n",
|
||||
c->param.request_va->number,
|
||||
c->param.request_va->args[0]);
|
||||
register_peer_channel(usrdata, current, c);
|
||||
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
|
||||
if (copy_to_user(&req->sr, c->param.request_va,
|
||||
sizeof(struct syscall_request))) {
|
||||
deregister_peer_channel(usrdata, current, c);
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
deregister_peer_channel(usrdata, current, c);
|
||||
#ifdef DO_USER_MODE
|
||||
goto retry;
|
||||
#endif
|
||||
#ifndef DO_USER_MODE
|
||||
if (usrdata->mcctrl_dma_abort) {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
}
|
||||
usrdata->remaining_job = 0;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg)
|
||||
{
|
||||
struct prepare_dma_desc desc;
|
||||
int pin_shift = 16;
|
||||
int order;
|
||||
unsigned long a;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(struct prepare_dma_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
order = pin_shift - PAGE_SHIFT;
|
||||
if(desc.size > 0){
|
||||
order = get_order (desc.size);
|
||||
}
|
||||
|
||||
a = __get_free_pages(GFP_KERNEL, order);
|
||||
if (!a) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
a = virt_to_phys((void *)a);
|
||||
|
||||
if (copy_to_user((void*)desc.pa, &a, sizeof(unsigned long))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_free_region(ihk_os_t os, unsigned long *__user arg)
|
||||
{
|
||||
struct free_dma_desc desc;
|
||||
int pin_shift = 16;
|
||||
int order;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(struct free_dma_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
order = pin_shift - PAGE_SHIFT;
|
||||
if(desc.size > 0){
|
||||
order = get_order (desc.size);
|
||||
}
|
||||
|
||||
if(desc.pa > 0){
|
||||
free_pages((unsigned long)phys_to_virt(desc.pa), order);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
||||
{
|
||||
struct syscall_load_desc desc;
|
||||
unsigned long phys;
|
||||
void *rpm;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(struct syscall_load_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), desc.src, desc.size);
|
||||
#ifdef CONFIG_MIC
|
||||
rpm = ioremap_wc(phys, desc.size);
|
||||
#else
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, desc.size, NULL, 0);
|
||||
#endif
|
||||
|
||||
dprintk("mcexec_load_syscall: %s (desc.size: %d)\n", rpm, desc.size);
|
||||
|
||||
if (copy_to_user((void *__user)desc.dest, rpm, desc.size)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(rpm);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, desc.size);
|
||||
#endif
|
||||
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size);
|
||||
|
||||
/*
|
||||
ihk_dma_channel_t channel;
|
||||
struct ihk_dma_request request;
|
||||
unsigned long dma_status = 0;
|
||||
|
||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
||||
if (!channel) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
request.src_os = os;
|
||||
request.src_phys = desc.src;
|
||||
request.dest_os = NULL;
|
||||
request.dest_phys = desc.dest;
|
||||
request.size = desc.size;
|
||||
request.notify = (void *)virt_to_phys(&dma_status);
|
||||
request.priv = (void *)1;
|
||||
|
||||
ihk_dma_request(channel, &request);
|
||||
|
||||
while (!dma_status) {
|
||||
mb();
|
||||
udelay(1);
|
||||
}
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
||||
{
|
||||
struct syscall_ret_desc ret;
|
||||
struct mcctrl_channel *mc;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
#if 0
|
||||
ihk_dma_channel_t channel;
|
||||
struct ihk_dma_request request;
|
||||
|
||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
||||
if (!channel) {
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
mc = usrdata->channels + ret.cpu;
|
||||
if (!mc) {
|
||||
return -EINVAL;
|
||||
}
|
||||
deregister_peer_channel(usrdata, current, mc);
|
||||
|
||||
mc->param.response_va->ret = ret.ret;
|
||||
|
||||
if (ret.size > 0) {
|
||||
/* Host => Accel. Write is fast. */
|
||||
unsigned long phys;
|
||||
void *rpm;
|
||||
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest,
|
||||
ret.size);
|
||||
#ifdef CONFIG_MIC
|
||||
rpm = ioremap_wc(phys, ret.size);
|
||||
#else
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
|
||||
ret.size, NULL, 0);
|
||||
#endif
|
||||
|
||||
if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
mb();
|
||||
mc->param.response_va->status = 1;
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(rpm);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size);
|
||||
#endif
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
|
||||
|
||||
/*
|
||||
memset(&request, 0, sizeof(request));
|
||||
request.src_os = NULL;
|
||||
request.src_phys = ret.src;
|
||||
request.dest_os = os;
|
||||
request.dest_phys = ret.dest;
|
||||
request.size = ret.size;
|
||||
request.notify_os = os;
|
||||
request.notify = (void *)mc->param.response_rpa;
|
||||
request.priv = (void *)1;
|
||||
|
||||
ihk_dma_request(channel, &request);
|
||||
*/
|
||||
} else {
|
||||
mb();
|
||||
mc->param.response_va->status = 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIST_HEAD(mckernel_exec_files);
|
||||
spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED;
|
||||
|
||||
|
||||
struct mckernel_exec_file {
|
||||
ihk_os_t os;
|
||||
pid_t pid;
|
||||
struct file *fp;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
||||
{
|
||||
struct file *file;
|
||||
struct mckernel_exec_file *mcef;
|
||||
struct mckernel_exec_file *mcef_iter;
|
||||
int retval;
|
||||
|
||||
file = open_exec(filename);
|
||||
retval = PTR_ERR(file);
|
||||
if (IS_ERR(file)) {
|
||||
goto out_return;
|
||||
}
|
||||
|
||||
mcef = kmalloc(sizeof(*mcef), GFP_KERNEL);
|
||||
if (!mcef) {
|
||||
retval = ENOMEM;
|
||||
goto out_put_file;
|
||||
}
|
||||
|
||||
spin_lock_irq(&mckernel_exec_file_lock);
|
||||
/* Find previous file (if exists) and drop it */
|
||||
list_for_each_entry(mcef_iter, &mckernel_exec_files, list) {
|
||||
if (mcef_iter->os == os && mcef_iter->pid == current->tgid) {
|
||||
allow_write_access(mcef_iter->fp);
|
||||
fput(mcef_iter->fp);
|
||||
list_del(&mcef_iter->list);
|
||||
kfree(mcef_iter);
|
||||
dprintk("%d open_exec dropped previous executable \n", (int)current->tgid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add new exec file to the list */
|
||||
mcef->os = os;
|
||||
mcef->pid = current->tgid;
|
||||
mcef->fp = file;
|
||||
list_add_tail(&mcef->list, &mckernel_exec_files);
|
||||
spin_unlock(&mckernel_exec_file_lock);
|
||||
|
||||
dprintk("%d open_exec and holding file: %s\n", (int)current->tgid, filename);
|
||||
return 0;
|
||||
|
||||
out_put_file:
|
||||
fput(file);
|
||||
|
||||
out_return:
|
||||
return -retval;
|
||||
}
|
||||
|
||||
|
||||
int mcexec_close_exec(ihk_os_t os)
|
||||
{
|
||||
struct mckernel_exec_file *mcef = NULL;
|
||||
int found = 0;
|
||||
|
||||
spin_lock_irq(&mckernel_exec_file_lock);
|
||||
list_for_each_entry(mcef, &mckernel_exec_files, list) {
|
||||
if (mcef->os == os && mcef->pid == current->tgid) {
|
||||
allow_write_access(mcef->fp);
|
||||
fput(mcef->fp);
|
||||
list_del(&mcef->list);
|
||||
kfree(mcef);
|
||||
found = 1;
|
||||
dprintk("%d close_exec dropped executable \n", (int)current->tgid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&mckernel_exec_file_lock);
|
||||
|
||||
return (found ? 0 : EINVAL);
|
||||
}
|
||||
|
||||
long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __user arg)
|
||||
{
|
||||
struct strncpy_from_user_desc desc;
|
||||
void *buf;
|
||||
void *dest;
|
||||
void *src;
|
||||
unsigned long remain;
|
||||
long want;
|
||||
long copied;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
buf = (void *)__get_free_page(GFP_KERNEL);
|
||||
if (!buf) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dest = desc.dest;
|
||||
src = desc.src;
|
||||
remain = desc.n;
|
||||
want = 0;
|
||||
copied = 0;
|
||||
|
||||
while ((remain > 0) && (want == copied)) {
|
||||
want = (remain > PAGE_SIZE)? PAGE_SIZE: remain;
|
||||
copied = strncpy_from_user(buf, src, want);
|
||||
if (copied == want) {
|
||||
if (copy_to_user(dest, buf, copied)) {
|
||||
copied = -EFAULT;
|
||||
}
|
||||
}
|
||||
else if (copied >= 0) {
|
||||
if (copy_to_user(dest, buf, copied+1)) {
|
||||
copied = -EFAULT;
|
||||
}
|
||||
}
|
||||
dest += copied;
|
||||
src += copied;
|
||||
remain -= copied;
|
||||
}
|
||||
|
||||
desc.result = (copied >= 0)? (desc.n - remain): copied;
|
||||
free_page((unsigned long)buf);
|
||||
|
||||
if (copy_to_user(arg, &desc, sizeof(*arg))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
|
||||
{
|
||||
switch (req) {
|
||||
case MCEXEC_UP_PREPARE_IMAGE:
|
||||
return mcexec_prepare_image(os,
|
||||
(struct program_load_desc *)arg);
|
||||
case MCEXEC_UP_TRANSFER:
|
||||
return mcexec_transfer_image(os, (struct remote_transfer *)arg);
|
||||
|
||||
case MCEXEC_UP_START_IMAGE:
|
||||
return mcexec_start_image(os, (struct program_load_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_WAIT_SYSCALL:
|
||||
return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_RET_SYSCALL:
|
||||
return mcexec_ret_syscall(os, (struct syscall_ret_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_LOAD_SYSCALL:
|
||||
return mcexec_load_syscall(os, (struct syscall_load_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SEND_SIGNAL:
|
||||
return mcexec_send_signal(os, (struct signal_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_GET_CPU:
|
||||
return mcexec_get_cpu(os);
|
||||
|
||||
case MCEXEC_UP_STRNCPY_FROM_USER:
|
||||
return mcexec_strncpy_from_user(os,
|
||||
(struct strncpy_from_user_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_OPEN_EXEC:
|
||||
return mcexec_open_exec(os, (char *)arg);
|
||||
|
||||
case MCEXEC_UP_CLOSE_EXEC:
|
||||
return mcexec_close_exec(os);
|
||||
|
||||
case MCEXEC_UP_PREPARE_DMA:
|
||||
return mcexec_pin_region(os, (unsigned long *)arg);
|
||||
|
||||
case MCEXEC_UP_FREE_DMA:
|
||||
return mcexec_free_region(os, (unsigned long *)arg);
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err)
|
||||
{
|
||||
struct program_load_desc *desc = phys_to_virt(arg);
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
|
||||
desc->err = err;
|
||||
desc->status = 1;
|
||||
|
||||
wake_up_all(&usrdata->wq_prepare);
|
||||
}
|
||||
|
||||
@ -1,135 +0,0 @@
|
||||
/**
|
||||
* \file executer/kernel/driver.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* kernel module entry
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
#define OS_MAX_MINOR 64
|
||||
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long);
|
||||
extern int prepare_ikc_channels(ihk_os_t os);
|
||||
extern void destroy_ikc_channels(ihk_os_t os);
|
||||
#ifndef DO_USER_MODE
|
||||
extern void mcctrl_syscall_init(void);
|
||||
#endif
|
||||
extern void procfs_init(int);
|
||||
extern void procfs_exit(int);
|
||||
|
||||
|
||||
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
|
||||
unsigned long arg)
|
||||
{
|
||||
return __mcctrl_control(os, request, arg);
|
||||
}
|
||||
|
||||
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_PREPARE_IMAGE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_TRANSFER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_START_IMAGE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_WAIT_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_RET_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc_proto = {
|
||||
.num_handlers = sizeof(mcctrl_uchs) / sizeof(mcctrl_uchs[0]),
|
||||
.handlers = mcctrl_uchs,
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR];
|
||||
|
||||
static ihk_os_t os[OS_MAX_MINOR];
|
||||
|
||||
static int __init mcctrl_init(void)
|
||||
{
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
rc = -ENOENT;
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
os[i] = ihk_host_find_os(i, NULL);
|
||||
if (os[i]) {
|
||||
printk("OS #%d found.\n", i);
|
||||
rc = 0;
|
||||
}
|
||||
}
|
||||
if(rc){
|
||||
printk("OS not found.\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
if (prepare_ikc_channels(os[i]) != 0) {
|
||||
printk("Preparing syscall channels failed.\n");
|
||||
os[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef DO_USER_MODE
|
||||
mcctrl_syscall_init();
|
||||
#endif
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
rc = ihk_os_register_user_call_handlers(os[i], mcctrl_uc + i);
|
||||
if(rc < 0){
|
||||
destroy_ikc_channels(os[i]);
|
||||
os[i] = NULL;
|
||||
}
|
||||
procfs_init(i);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit mcctrl_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
printk("mcctrl: unregistered.\n");
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if(os[i]){
|
||||
ihk_os_unregister_user_call_handlers(os[i], mcctrl_uc + i);
|
||||
destroy_ikc_channels(os[i]);
|
||||
procfs_exit(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
module_init(mcctrl_init);
|
||||
module_exit(mcctrl_exit);
|
||||
@ -1,188 +0,0 @@
|
||||
/**
|
||||
* \file mcctrl.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* define data structure
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/11/07 hamada added <sys/resource.h> which is required by getrlimit(2)
|
||||
* 2013/10/21 nakamura exclude interpreter's segment from data region
|
||||
* 2013/10/11 nakamura mcexec: add a upper limit of the stack size
|
||||
* 2013/10/11 nakamura mcexec: add a path prefix for interpreter search
|
||||
* 2013/10/11 nakamura mcexec: add a interpreter invocation
|
||||
* 2013/10/08 nakamura add a AT_ENTRY entry to the auxiliary vector
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
* 2013/08/07 nakamura add page fault forwarding
|
||||
* 2013/07/26 shirasawa mcexec print signum or exit status
|
||||
* 2013/07/17 nakamura create more mcexec thread so that all cpu to be serviced
|
||||
* 2013/04/17 nakamura add generic system call forwarding
|
||||
*/
|
||||
#ifndef HEADER_MCCTRL_H
|
||||
#define HEADER_MCCTRL_H
|
||||
|
||||
#include <ihk/ihk_host_driver.h>
|
||||
#include <uprotocol.h>
|
||||
#include <linux/wait.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
|
||||
#define SCD_MSG_PREPARE_PROCESS 0x1
|
||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||
|
||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
#define __NR_coredump 999
|
||||
|
||||
struct coretable {
|
||||
int len;
|
||||
unsigned long addr;
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
int err;
|
||||
unsigned long arg;
|
||||
};
|
||||
|
||||
struct mcctrl_priv {
|
||||
ihk_os_t os;
|
||||
struct program_load_desc *desc;
|
||||
};
|
||||
|
||||
struct ikc_scd_init_param {
|
||||
unsigned long request_page;
|
||||
unsigned long response_page;
|
||||
unsigned long doorbell_page;
|
||||
unsigned long post_page;
|
||||
};
|
||||
|
||||
struct syscall_post {
|
||||
unsigned long v[8];
|
||||
};
|
||||
|
||||
struct syscall_params {
|
||||
unsigned long request_pa;
|
||||
struct syscall_request *request_va;
|
||||
unsigned long response_rpa, response_pa;
|
||||
struct syscall_response *response_va;
|
||||
unsigned long post_pa;
|
||||
struct syscall_post *post_va;
|
||||
|
||||
unsigned long doorbell_pa;
|
||||
unsigned long *doorbell_va;
|
||||
};
|
||||
|
||||
struct wait_queue_head_list_node {
|
||||
struct list_head list;
|
||||
wait_queue_head_t wq_syscall;
|
||||
int pid;
|
||||
int req;
|
||||
};
|
||||
|
||||
struct mcctrl_channel {
|
||||
struct ihk_ikc_channel_desc *c;
|
||||
struct syscall_params param;
|
||||
struct ikc_scd_init_param init;
|
||||
void *dma_buf;
|
||||
|
||||
struct list_head wq_list;
|
||||
ihk_spinlock_t wq_list_lock;
|
||||
};
|
||||
|
||||
struct mcctrl_per_proc_data {
|
||||
struct list_head list;
|
||||
int pid;
|
||||
unsigned long rpgtable; /* per process, not per OS */
|
||||
};
|
||||
|
||||
struct mcctrl_usrdata {
|
||||
struct ihk_ikc_listen_param listen_param;
|
||||
struct ihk_ikc_listen_param listen_param2;
|
||||
ihk_os_t os;
|
||||
int num_channels;
|
||||
struct mcctrl_channel *channels;
|
||||
unsigned long *mcctrl_doorbell_va;
|
||||
unsigned long mcctrl_doorbell_pa;
|
||||
int remaining_job;
|
||||
int base_cpu;
|
||||
int job_pos;
|
||||
int mcctrl_dma_abort;
|
||||
unsigned long last_thread_exec;
|
||||
wait_queue_head_t wq_prepare;
|
||||
|
||||
struct list_head per_proc_list;
|
||||
ihk_spinlock_t per_proc_list_lock;
|
||||
void **keys;
|
||||
};
|
||||
|
||||
struct mcctrl_signal {
|
||||
int cond;
|
||||
int sig;
|
||||
int pid;
|
||||
int tid;
|
||||
char info[128];
|
||||
};
|
||||
|
||||
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
|
||||
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
|
||||
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
|
||||
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
|
||||
unsigned long *endp);
|
||||
|
||||
/* syscall.c */
|
||||
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
|
||||
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
||||
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
||||
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
|
||||
|
||||
#define PROCFS_NAME_MAX 1000
|
||||
|
||||
struct procfs_read {
|
||||
unsigned long pbuf; /* physical address of the host buffer (request) */
|
||||
unsigned long offset; /* offset to read (request) */
|
||||
int count; /* bytes to read (request) */
|
||||
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
|
||||
int ret; /* read bytes (answer) */
|
||||
int status; /* non-zero if done (answer) */
|
||||
int newcpu; /* migrated new cpu (answer) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
struct procfs_file {
|
||||
int status; /* status of processing (answer) */
|
||||
int mode; /* file mode (request) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
#endif
|
||||
27
executer/kernel/mcctrl/Makefile.in
Normal file
27
executer/kernel/mcctrl/Makefile.in
Normal file
@ -0,0 +1,27 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
BINDIR=@BINDIR@
|
||||
IHK_BASE=$(src)/../../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
mcctrl-y += sysfs.o sysfs_files.o arch/$(ARCH)/archdeps.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../../ihk/linux/core/Module.symvers
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcctrl.ko $(KMODDIR)
|
||||
1
executer/kernel/mcctrl/arch/x86_64/Makefile.in
Normal file
1
executer/kernel/mcctrl/arch/x86_64/Makefile.in
Normal file
@ -0,0 +1 @@
|
||||
# dummy file
|
||||
192
executer/kernel/mcctrl/arch/x86_64/archdeps.c
Normal file
192
executer/kernel/mcctrl/arch/x86_64/archdeps.c
Normal file
@ -0,0 +1,192 @@
|
||||
#include <linux/version.h>
|
||||
#include "../../config.h"
|
||||
#include "../../mcctrl.h"
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_image_64
|
||||
#if MCCTRL_KSYM_vdso_image_64
|
||||
struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_start
|
||||
#if MCCTRL_KSYM_vdso_start
|
||||
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_end
|
||||
#if MCCTRL_KSYM_vdso_end
|
||||
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_pages
|
||||
#if MCCTRL_KSYM_vdso_pages
|
||||
struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM___vvar_page
|
||||
#if MCCTRL_KSYM___vvar_page
|
||||
void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
long *hpet_addressp
|
||||
#ifdef MCCTRL_KSYM_hpet_address
|
||||
#if MCCTRL_KSYM_hpet_address
|
||||
= (void *)MCCTRL_KSYM_hpet_address;
|
||||
#else
|
||||
= &hpet_address;
|
||||
#endif
|
||||
#else
|
||||
= NULL;
|
||||
#endif
|
||||
|
||||
void **hv_clockp
|
||||
#ifdef MCCTRL_KSYM_hv_clock
|
||||
#if MCCTRL_KSYM_hv_clock
|
||||
= (void *)MCCTRL_KSYM_hv_clock;
|
||||
#else
|
||||
= &hv_clock;
|
||||
#endif
|
||||
#else
|
||||
= NULL;
|
||||
#endif
|
||||
|
||||
unsigned long
|
||||
reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end);
|
||||
|
||||
int
|
||||
reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long start = 0L;
|
||||
unsigned long end;
|
||||
|
||||
#define DESIRED_USER_END 0x800000000000
|
||||
#define GAP_FOR_MCEXEC 0x008000000000UL
|
||||
end = DESIRED_USER_END;
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, 0);
|
||||
if (vma) {
|
||||
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#endif
|
||||
start = reserve_user_space_common(usrdata, start, end);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#endif
|
||||
|
||||
if (IS_ERR_VALUE(start)) {
|
||||
return start;
|
||||
}
|
||||
*startp = start;
|
||||
*endp = end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void get_vdso_info(ihk_os_t os, long vdso_rpa)
|
||||
{
|
||||
ihk_device_t dev = ihk_os_to_dev(os);
|
||||
long vdso_pa;
|
||||
struct vdso *vdso;
|
||||
size_t size;
|
||||
int i;
|
||||
|
||||
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
|
||||
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
|
||||
|
||||
/* VDSO pages */
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
size = vdso_image->size;
|
||||
vdso->vdso_npages = size >> PAGE_SHIFT;
|
||||
|
||||
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
||||
vdso->vdso_npages = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < vdso->vdso_npages; ++i) {
|
||||
vdso->vdso_physlist[i] = virt_to_phys(
|
||||
vdso_image->data + (i * PAGE_SIZE));
|
||||
}
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
||||
size = vdso_end - vdso_start;
|
||||
size = (size + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
|
||||
vdso->vdso_npages = size >> PAGE_SHIFT;
|
||||
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
||||
vdso->vdso_npages = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < vdso->vdso_npages; ++i) {
|
||||
vdso->vdso_physlist[i] = page_to_phys(vdso_pages[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* VVAR page */
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(-3 * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(-2 * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
|
||||
vdso->vvar_is_global = 1;
|
||||
vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#endif
|
||||
|
||||
/* HPET page */
|
||||
if (hpet_addressp && *hpet_addressp) {
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
||||
vdso->hpet_is_global = 1;
|
||||
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* struct pvlock_vcpu_time_info table */
|
||||
if (hv_clockp && *hv_clockp) {
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->pvti_is_global = 0;
|
||||
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
|
||||
vdso->pvti_phys = virt_to_phys(*hv_clockp);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
|
||||
vdso->pvti_is_global = 1;
|
||||
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
|
||||
vdso->pvti_phys = virt_to_phys(*hv_clockp);
|
||||
#endif
|
||||
}
|
||||
|
||||
out:
|
||||
wmb();
|
||||
vdso->busy = 0;
|
||||
|
||||
ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso));
|
||||
ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso));
|
||||
return;
|
||||
} /* get_vdso_info() */
|
||||
282
executer/kernel/mcctrl/binfmt_mcexec.c
Normal file
282
executer/kernel/mcctrl/binfmt_mcexec.c
Normal file
@ -0,0 +1,282 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/stat.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/elfcore.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/version.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
static int pathcheck(const char *file, const char *list)
|
||||
{
|
||||
const char *p;
|
||||
const char *q;
|
||||
const char *r;
|
||||
int l;
|
||||
|
||||
if(!*list)
|
||||
return 1;
|
||||
p = list;
|
||||
do{
|
||||
q = strchr(p, ':');
|
||||
if(!q)
|
||||
q = strchr(p, '\0');
|
||||
for(r = q - 1; r >= p && *r == '/'; r--);
|
||||
l = r - p + 1;
|
||||
|
||||
if(!strncmp(file, p, l) &&
|
||||
file[l] == '/')
|
||||
return 1;
|
||||
|
||||
p = q + 1;
|
||||
} while(*q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int load_elf(struct linux_binprm *bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, struct pt_regs *regs
|
||||
#endif
|
||||
)
|
||||
{
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
||||
const
|
||||
#endif
|
||||
char *wp;
|
||||
char *cp;
|
||||
struct file *file;
|
||||
int rc;
|
||||
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *val;
|
||||
int l;
|
||||
} envdata;
|
||||
envdata env[] = {
|
||||
{.name = "MCEXEC_WL"},
|
||||
#define env_mcexec_wl (env[0].val)
|
||||
{.name = NULL}
|
||||
};
|
||||
envdata *ep;
|
||||
unsigned long off = 0;
|
||||
struct page *page;
|
||||
char *addr = NULL;
|
||||
int i;
|
||||
unsigned long p;
|
||||
int st;
|
||||
int mode;
|
||||
int cnt[2];
|
||||
char buf[32];
|
||||
int l;
|
||||
int pass;
|
||||
char *pbuf;
|
||||
const char *path;
|
||||
|
||||
if(bprm->envc == 0)
|
||||
return -ENOEXEC;
|
||||
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
|
||||
return -ENOEXEC;
|
||||
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
|
||||
return -ENOEXEC;
|
||||
|
||||
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
|
||||
return -ENOEXEC;
|
||||
|
||||
pbuf = kmalloc(1024, GFP_ATOMIC);
|
||||
if (!pbuf) {
|
||||
printk("%s: error: allocating pbuf\n", __FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
path = d_path(&bprm->file->f_path, pbuf, 1024);
|
||||
if(!path || IS_ERR(path))
|
||||
path = bprm->interp;
|
||||
|
||||
cp = strrchr(path, '/');
|
||||
if(!cp ||
|
||||
!strcmp(cp, "/mcexec") ||
|
||||
!strcmp(cp, "/ihkosctl") ||
|
||||
!strcmp(cp, "/ihkconfig")) {
|
||||
kfree(pbuf);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
cnt[0] = bprm->argc;
|
||||
cnt[1] = bprm->envc;
|
||||
for(pass = 0; pass < 2; pass++){
|
||||
p = bprm->p;
|
||||
mode = cnt[0] == 0? 1: 0;
|
||||
if(pass == 1){
|
||||
for(ep = env; ep->name; ep++){
|
||||
if(ep->l)
|
||||
ep->val = kmalloc(ep->l, GFP_KERNEL);
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
for(i = 0, st = 0; mode != 2;){
|
||||
if(st == 0){
|
||||
off = p & ~PAGE_MASK;
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
|
||||
rc = get_user_pages_remote(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
#else
|
||||
rc = get_user_pages(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
#endif
|
||||
if(rc <= 0) {
|
||||
kfree(pbuf);
|
||||
return -EFAULT;
|
||||
}
|
||||
addr = kmap_atomic(page
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
st = 1;
|
||||
}
|
||||
if(addr[off]){
|
||||
if(mode == 1){
|
||||
if(ep){
|
||||
if(pass == 1)
|
||||
ep->val[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
else if(addr[off] == '='){
|
||||
if(l < 32)
|
||||
buf[l] = '\0';
|
||||
buf[31] = '\0';
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(!strcmp(ep->name, buf))
|
||||
break;
|
||||
if(ep->name)
|
||||
l = 0;
|
||||
else
|
||||
ep = NULL;
|
||||
}
|
||||
else{
|
||||
if(l < 32)
|
||||
buf[l] = addr[off];
|
||||
l++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(mode == 1 && ep){
|
||||
if(pass == 0){
|
||||
ep->l = l + 1;
|
||||
}
|
||||
else{
|
||||
ep->val[l] = '\0';
|
||||
}
|
||||
}
|
||||
ep = NULL;
|
||||
l = 0;
|
||||
i++;
|
||||
if(i == cnt[mode]){
|
||||
i = 0;
|
||||
mode++;
|
||||
}
|
||||
}
|
||||
off++;
|
||||
p++;
|
||||
if(off == PAGE_SIZE || mode == 2){
|
||||
kunmap_atomic(addr
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
|
||||
, KM_USER0
|
||||
#endif
|
||||
);
|
||||
put_page(page);
|
||||
st = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(env_mcexec_wl)
|
||||
rc = !pathcheck(path, env_mcexec_wl);
|
||||
else
|
||||
rc = 1;
|
||||
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(ep->val)
|
||||
kfree(ep->val);
|
||||
if(rc) {
|
||||
kfree(pbuf);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
file = open_exec(MCEXEC_PATH);
|
||||
if (IS_ERR(file)) {
|
||||
kfree(pbuf);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
rc = remove_arg_zero(bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
kfree(pbuf);
|
||||
return rc;
|
||||
}
|
||||
rc = copy_strings_kernel(1, &bprm->interp, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
kfree(pbuf);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
wp = MCEXEC_PATH;
|
||||
rc = copy_strings_kernel(1, &wp, bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
kfree(pbuf);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
rc = bprm_change_interp(MCEXEC_PATH, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
kfree(pbuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
allow_write_access(bprm->file);
|
||||
fput(bprm->file);
|
||||
bprm->file = file;
|
||||
|
||||
rc = prepare_binprm(bprm);
|
||||
if (rc < 0){
|
||||
kfree(pbuf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
kfree(pbuf);
|
||||
|
||||
return search_binary_handler(bprm
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
|
||||
, regs
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
static struct linux_binfmt mcexec_format = {
|
||||
.module = THIS_MODULE,
|
||||
.load_binary = load_elf,
|
||||
};
|
||||
|
||||
void __init binfmt_mcexec_init(void)
|
||||
{
|
||||
insert_binfmt(&mcexec_format);
|
||||
}
|
||||
|
||||
void binfmt_mcexec_exit(void)
|
||||
{
|
||||
unregister_binfmt(&mcexec_format);
|
||||
}
|
||||
1333
executer/kernel/mcctrl/control.c
Normal file
1333
executer/kernel/mcctrl/control.c
Normal file
File diff suppressed because it is too large
Load Diff
203
executer/kernel/mcctrl/driver.c
Normal file
203
executer/kernel/mcctrl/driver.c
Normal file
@ -0,0 +1,203 @@
|
||||
/**
|
||||
* \file executer/kernel/driver.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* kernel module entry
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/device.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
#define OS_MAX_MINOR 64
|
||||
|
||||
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
|
||||
struct file *);
|
||||
extern int prepare_ikc_channels(ihk_os_t os);
|
||||
extern void destroy_ikc_channels(ihk_os_t os);
|
||||
#ifndef DO_USER_MODE
|
||||
extern void mcctrl_syscall_init(void);
|
||||
#endif
|
||||
extern void procfs_init(int);
|
||||
extern void procfs_exit(int);
|
||||
|
||||
extern void rus_page_hash_init(void);
|
||||
extern void rus_page_hash_put_pages(void);
|
||||
extern void binfmt_mcexec_init(void);
|
||||
extern void binfmt_mcexec_exit(void);
|
||||
|
||||
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
|
||||
unsigned long arg, struct file *file)
|
||||
{
|
||||
return __mcctrl_control(os, request, arg, file);
|
||||
}
|
||||
|
||||
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_PREPARE_IMAGE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_TRANSFER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_START_IMAGE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_WAIT_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_RET_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_LOAD_SYSCALL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_NODES, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UMOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc_proto = {
|
||||
.num_handlers = sizeof(mcctrl_uchs) / sizeof(mcctrl_uchs[0]),
|
||||
.handlers = mcctrl_uchs,
|
||||
};
|
||||
|
||||
static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR];
|
||||
|
||||
static ihk_os_t os[OS_MAX_MINOR];
|
||||
|
||||
ihk_os_t osnum_to_os(int n)
|
||||
{
|
||||
return os[n];
|
||||
}
|
||||
|
||||
/* OS event notifier implementation */
|
||||
int mcctrl_os_boot_notifier(int os_index)
|
||||
{
|
||||
int rc;
|
||||
|
||||
os[os_index] = ihk_host_find_os(os_index, NULL);
|
||||
if (!os[os_index]) {
|
||||
printk("mcctrl: error: OS ID %d couldn't be found\n", os_index);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (prepare_ikc_channels(os[os_index]) != 0) {
|
||||
printk("mcctrl: error: preparing IKC channels for OS %d\n", os_index);
|
||||
|
||||
os[os_index] = NULL;
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
memcpy(mcctrl_uc + os_index, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
|
||||
rc = ihk_os_register_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||
if (rc < 0) {
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
printk("mcctrl: error: registering callbacks for OS %d\n", os_index);
|
||||
|
||||
goto error_cleanup_channels;
|
||||
}
|
||||
|
||||
procfs_init(os_index);
|
||||
printk("mcctrl: OS ID %d boot event handled\n", os_index);
|
||||
|
||||
return 0;
|
||||
|
||||
error_cleanup_channels:
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
|
||||
os[os_index] = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mcctrl_os_shutdown_notifier(int os_index)
|
||||
{
|
||||
if (os[os_index]) {
|
||||
sysfsm_cleanup(os[os_index]);
|
||||
free_topology_info(os[os_index]);
|
||||
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
procfs_exit(os_index);
|
||||
}
|
||||
|
||||
os[os_index] = NULL;
|
||||
|
||||
printk("mcctrl: OS ID %d shutdown event handled\n", os_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ihk_os_notifier_ops mcctrl_os_notifier_ops = {
|
||||
.boot = mcctrl_os_boot_notifier,
|
||||
.shutdown = mcctrl_os_shutdown_notifier,
|
||||
};
|
||||
|
||||
static struct ihk_os_notifier mcctrl_os_notifier = {
|
||||
.ops = &mcctrl_os_notifier_ops,
|
||||
};
|
||||
|
||||
static int __init mcctrl_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
#ifndef DO_USER_MODE
|
||||
mcctrl_syscall_init();
|
||||
#endif
|
||||
|
||||
for (i = 0; i < OS_MAX_MINOR; ++i) {
|
||||
os[i] = NULL;
|
||||
}
|
||||
|
||||
rus_page_hash_init();
|
||||
|
||||
binfmt_mcexec_init();
|
||||
|
||||
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
|
||||
printk("mcctrl: error: registering OS notifier\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
printk("mcctrl: initialized successfully.\n");
|
||||
return ret;
|
||||
|
||||
error:
|
||||
binfmt_mcexec_exit();
|
||||
rus_page_hash_put_pages();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit mcctrl_exit(void)
|
||||
{
|
||||
if (ihk_host_deregister_os_notifier(&mcctrl_os_notifier) != 0) {
|
||||
printk("mcctrl: warning: failed to deregister OS notifier??\n");
|
||||
}
|
||||
|
||||
binfmt_mcexec_exit();
|
||||
rus_page_hash_put_pages();
|
||||
|
||||
printk("mcctrl: unregistered.\n");
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
module_init(mcctrl_init);
|
||||
module_exit(mcctrl_exit);
|
||||
@ -27,6 +27,7 @@
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include "mcctrl.h"
|
||||
#ifdef ATTACHED_MIC
|
||||
#include <sysdeps/mic/mic/micconst.h>
|
||||
@ -34,25 +35,34 @@
|
||||
|
||||
#define REQUEST_SHIFT 16
|
||||
|
||||
//#define DEBUG_IKC
|
||||
|
||||
#ifdef DEBUG_IKC
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) printk(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) printk(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
//int num_channels;
|
||||
|
||||
//struct mcctrl_channel *channels;
|
||||
|
||||
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
|
||||
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
|
||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg);
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg);
|
||||
void procfs_delete(void *__os, int osnum, unsigned long arg);
|
||||
void procfs_answer(unsigned long arg, int err);
|
||||
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
|
||||
void sig_done(unsigned long arg, int err);
|
||||
|
||||
/* XXX: this runs in atomic context! */
|
||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
void *__packet, void *__os)
|
||||
{
|
||||
struct ikc_scd_packet *pisp = __packet;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
|
||||
int msg = pisp->msg;
|
||||
|
||||
switch (pisp->msg) {
|
||||
switch (msg) {
|
||||
case SCD_MSG_INIT_CHANNEL:
|
||||
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
|
||||
break;
|
||||
@ -66,15 +76,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
break;
|
||||
|
||||
case SCD_MSG_SYSCALL_ONESIDE:
|
||||
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_CREATE:
|
||||
procfs_create(__os, pisp->ref, pisp->osnum, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_DELETE:
|
||||
procfs_delete(__os, pisp->osnum, pisp->arg);
|
||||
mcexec_syscall(usrdata, pisp);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_ANSWER:
|
||||
@ -84,6 +86,43 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
case SCD_MSG_SEND_SIGNAL:
|
||||
sig_done(pisp->arg, pisp->err);
|
||||
break;
|
||||
|
||||
case SCD_MSG_SYSFS_REQ_CREATE:
|
||||
case SCD_MSG_SYSFS_REQ_MKDIR:
|
||||
case SCD_MSG_SYSFS_REQ_SYMLINK:
|
||||
case SCD_MSG_SYSFS_REQ_LOOKUP:
|
||||
case SCD_MSG_SYSFS_REQ_UNLINK:
|
||||
case SCD_MSG_SYSFS_REQ_SETUP:
|
||||
case SCD_MSG_SYSFS_RESP_SHOW:
|
||||
case SCD_MSG_SYSFS_RESP_STORE:
|
||||
case SCD_MSG_SYSFS_RESP_RELEASE:
|
||||
sysfsm_packet_handler(__os, pisp->msg, pisp->err,
|
||||
pisp->sysfs_arg1, pisp->sysfs_arg2);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_TID_CREATE:
|
||||
case SCD_MSG_PROCFS_TID_DELETE:
|
||||
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_GET_VDSO_INFO:
|
||||
get_vdso_info(__os, pisp->arg);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "mcctrl:syscall_packet_handler:"
|
||||
"unknown message (%d.%d.%d.%d.%d.%#lx)\n",
|
||||
pisp->msg, pisp->ref, pisp->osnum, pisp->pid,
|
||||
pisp->err, pisp->arg);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it
|
||||
* mcexec_ret_syscall(), for the rest, free it here.
|
||||
*/
|
||||
if (msg != SCD_MSG_SYSCALL_ONESIDE) {
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)__packet, c);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -121,8 +160,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu)
|
||||
|
||||
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
|
||||
ihk_ikc_get_processor_id());
|
||||
kprintf("Setting the target to %d\n",
|
||||
ihk_ikc_get_processor_id());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -137,91 +174,26 @@ int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu)
|
||||
}
|
||||
}
|
||||
|
||||
//unsigned long *mcctrl_doorbell_va;
|
||||
//unsigned long mcctrl_doorbell_pa;
|
||||
|
||||
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c)
|
||||
{
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct ikc_scd_packet packet;
|
||||
struct mcctrl_channel *pmc = usrdata->channels + cpu;
|
||||
unsigned long phys;
|
||||
struct ikc_scd_init_param *rpm;
|
||||
|
||||
if(c->port == 502)
|
||||
if (c->port == 502) {
|
||||
pmc = usrdata->channels + usrdata->num_channels - 1;
|
||||
|
||||
if (!pmc) {
|
||||
return;
|
||||
}
|
||||
|
||||
printk("IKC init: cpu=%d port=%d\n", cpu, c->port);
|
||||
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), rphys,
|
||||
sizeof(struct ikc_scd_init_param));
|
||||
#ifdef CONFIG_MIC
|
||||
rpm = ioremap_wc(phys, sizeof(struct ikc_scd_init_param));
|
||||
#else
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
|
||||
sizeof(struct ikc_scd_init_param),
|
||||
NULL, 0);
|
||||
#endif
|
||||
|
||||
pmc->param.request_va =
|
||||
(void *)__get_free_pages(GFP_KERNEL,
|
||||
REQUEST_SHIFT - PAGE_SHIFT);
|
||||
pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
|
||||
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
|
||||
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
|
||||
pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL);
|
||||
pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
|
||||
memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
|
||||
memset(pmc->param.request_va, 0, PAGE_SIZE);
|
||||
memset(pmc->param.post_va, 0, PAGE_SIZE);
|
||||
|
||||
pmc->param.response_rpa = rpm->response_page;
|
||||
pmc->param.response_pa
|
||||
= ihk_device_map_memory(ihk_os_to_dev(os),
|
||||
pmc->param.response_rpa,
|
||||
PAGE_SIZE);
|
||||
#ifdef CONFIG_MIC
|
||||
pmc->param.response_va = ioremap_cache(pmc->param.response_pa,
|
||||
PAGE_SIZE);
|
||||
#else
|
||||
pmc->param.response_va = ihk_device_map_virtual(ihk_os_to_dev(os),
|
||||
pmc->param.response_pa,
|
||||
PAGE_SIZE, NULL, 0);
|
||||
#endif
|
||||
|
||||
pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL,
|
||||
DMA_PIN_SHIFT - PAGE_SHIFT);
|
||||
|
||||
rpm->request_page = pmc->param.request_pa;
|
||||
rpm->doorbell_page = pmc->param.doorbell_pa;
|
||||
rpm->post_page = pmc->param.post_pa;
|
||||
if (!pmc) {
|
||||
kprintf("%s: error: no channel found?\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
packet.msg = SCD_MSG_INIT_CHANNEL_ACKED;
|
||||
packet.ref = cpu;
|
||||
packet.arg = rphys;
|
||||
|
||||
printk("Request: %lx, Response: %lx, Doorbell: %lx\n",
|
||||
pmc->param.request_pa, pmc->param.response_rpa,
|
||||
pmc->param.doorbell_pa);
|
||||
printk("Request: %p, Response: %p, Doorbell: %p\n",
|
||||
pmc->param.request_va, pmc->param.response_va,
|
||||
pmc->param.doorbell_va);
|
||||
|
||||
ihk_ikc_send(pmc->c, &packet, 0);
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(rpm);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm,
|
||||
sizeof(struct ikc_scd_init_param));
|
||||
#endif
|
||||
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys,
|
||||
sizeof(struct ikc_scd_init_param));
|
||||
}
|
||||
|
||||
static int connect_handler(struct ihk_ikc_channel_info *param)
|
||||
@ -240,11 +212,8 @@ static int connect_handler(struct ihk_ikc_channel_info *param)
|
||||
}
|
||||
param->packet_handler = syscall_packet_handler;
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
||||
|
||||
usrdata->channels[cpu].c = c;
|
||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
dkprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -261,11 +230,8 @@ static int connect_handler2(struct ihk_ikc_channel_info *param)
|
||||
|
||||
param->packet_handler = syscall_packet_handler;
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
||||
|
||||
usrdata->channels[cpu].c = c;
|
||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
dkprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -288,27 +254,29 @@ static struct ihk_ikc_listen_param listen_param2 = {
|
||||
|
||||
int prepare_ikc_channels(ihk_os_t os)
|
||||
{
|
||||
struct ihk_cpu_info *info;
|
||||
struct mcctrl_usrdata *usrdata;
|
||||
int error;
|
||||
struct mcctrl_usrdata *usrdata;
|
||||
int i;
|
||||
|
||||
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
|
||||
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
|
||||
usrdata->mcctrl_doorbell_pa = virt_to_phys(usrdata->mcctrl_doorbell_va);
|
||||
|
||||
info = ihk_os_get_cpu_info(os);
|
||||
if (!info) {
|
||||
printk("Error: cannot retrieve CPU info.\n");
|
||||
usrdata->cpu_info = ihk_os_get_cpu_info(os);
|
||||
usrdata->mem_info = ihk_os_get_memory_info(os);
|
||||
|
||||
if (!usrdata->cpu_info || !usrdata->mem_info) {
|
||||
printk("Error: cannot obtain OS CPU and memory information.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (info->n_cpus < 1) {
|
||||
|
||||
if (usrdata->cpu_info->n_cpus < 1) {
|
||||
printk("Error: # of cpu is invalid.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
usrdata->num_channels = info->n_cpus + 1;
|
||||
usrdata->channels = kzalloc(sizeof(struct mcctrl_channel) * usrdata->num_channels,
|
||||
GFP_KERNEL);
|
||||
usrdata->num_channels = usrdata->cpu_info->n_cpus + 1;
|
||||
usrdata->channels = kzalloc(sizeof(struct mcctrl_channel) *
|
||||
usrdata->num_channels,
|
||||
GFP_KERNEL);
|
||||
|
||||
if (!usrdata->channels) {
|
||||
printk("Error: cannot allocate channels.\n");
|
||||
return -ENOMEM;
|
||||
@ -322,33 +290,20 @@ int prepare_ikc_channels(ihk_os_t os)
|
||||
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
|
||||
ihk_ikc_listen_port(os, &usrdata->listen_param2);
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->per_proc_list);
|
||||
spin_lock_init(&usrdata->per_proc_list_lock);
|
||||
|
||||
error = init_peer_channel_registry(usrdata);
|
||||
if (error) {
|
||||
return error;
|
||||
for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) {
|
||||
INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]);
|
||||
rwlock_init(&usrdata->per_proc_data_hash_lock[i]);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
|
||||
INIT_LIST_HEAD(&usrdata->node_topology_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __destroy_ikc_channel(ihk_os_t os, struct mcctrl_channel *pmc)
|
||||
{
|
||||
free_pages((unsigned long)pmc->param.request_va,
|
||||
REQUEST_SHIFT - PAGE_SHIFT);
|
||||
free_page((unsigned long)pmc->param.post_va);
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(pmc->param.response_va);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), pmc->param.response_va,
|
||||
PAGE_SIZE);
|
||||
#endif
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os),
|
||||
pmc->param.response_pa, PAGE_SIZE);
|
||||
free_pages((unsigned long)pmc->dma_buf,
|
||||
DMA_PIN_SHIFT - PAGE_SHIFT);
|
||||
return;
|
||||
}
|
||||
|
||||
void destroy_ikc_channels(ihk_os_t os)
|
||||
@ -356,6 +311,11 @@ void destroy_ikc_channels(ihk_os_t os)
|
||||
int i;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
|
||||
if (!usrdata) {
|
||||
printk("%s: WARNING: no mcctrl_usrdata found\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
ihk_host_os_set_usrdata(os, NULL);
|
||||
|
||||
for (i = 0; i < usrdata->num_channels; i++) {
|
||||
@ -366,7 +326,6 @@ void destroy_ikc_channels(ihk_os_t os)
|
||||
printk("Channel #%d freed.\n", i);
|
||||
}
|
||||
}
|
||||
free_page((unsigned long)usrdata->mcctrl_doorbell_va);
|
||||
|
||||
kfree(usrdata->channels);
|
||||
kfree(usrdata);
|
||||
389
executer/kernel/mcctrl/mcctrl.h
Normal file
389
executer/kernel/mcctrl/mcctrl.h
Normal file
@ -0,0 +1,389 @@
|
||||
/**
|
||||
* \file mcctrl.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* define data structure
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/11/07 hamada added <sys/resource.h> which is required by getrlimit(2)
|
||||
* 2013/10/21 nakamura exclude interpreter's segment from data region
|
||||
* 2013/10/11 nakamura mcexec: add a upper limit of the stack size
|
||||
* 2013/10/11 nakamura mcexec: add a path prefix for interpreter search
|
||||
* 2013/10/11 nakamura mcexec: add a interpreter invocation
|
||||
* 2013/10/08 nakamura add a AT_ENTRY entry to the auxiliary vector
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
* 2013/08/07 nakamura add page fault forwarding
|
||||
* 2013/07/26 shirasawa mcexec print signum or exit status
|
||||
* 2013/07/17 nakamura create more mcexec thread so that all cpu to be serviced
|
||||
* 2013/04/17 nakamura add generic system call forwarding
|
||||
*/
|
||||
#ifndef HEADER_MCCTRL_H
|
||||
#define HEADER_MCCTRL_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <ihk/ihk_host_driver.h>
|
||||
#include <linux/resource.h>
|
||||
#include <uprotocol.h>
|
||||
#include <linux/wait.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
#include <ihk/msr.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/rwlock.h>
|
||||
#include <linux/threads.h>
|
||||
#include "sysfs.h"
|
||||
|
||||
#define SCD_MSG_PREPARE_PROCESS 0x1
|
||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
|
||||
|
||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
#define SCD_MSG_GET_VDSO_INFO 0xa
|
||||
|
||||
//#define SCD_MSG_GET_CPU_MAPPING 0xc
|
||||
//#define SCD_MSG_REPLY_GET_CPU_MAPPING 0xd
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define SCD_MSG_SYSFS_REQ_CREATE 0x30
|
||||
/* #define SCD_MSG_SYSFS_RESP_CREATE 0x31 */
|
||||
#define SCD_MSG_SYSFS_REQ_MKDIR 0x32
|
||||
/* #define SCD_MSG_SYSFS_RESP_MKDIR 0x33 */
|
||||
#define SCD_MSG_SYSFS_REQ_SYMLINK 0x34
|
||||
/* #define SCD_MSG_SYSFS_RESP_SYMLINK 0x35 */
|
||||
#define SCD_MSG_SYSFS_REQ_LOOKUP 0x36
|
||||
/* #define SCD_MSG_SYSFS_RESP_LOOKUP 0x37 */
|
||||
#define SCD_MSG_SYSFS_REQ_UNLINK 0x38
|
||||
/* #define SCD_MSG_SYSFS_RESP_UNLINK 0x39 */
|
||||
#define SCD_MSG_SYSFS_REQ_SHOW 0x3a
|
||||
#define SCD_MSG_SYSFS_RESP_SHOW 0x3b
|
||||
#define SCD_MSG_SYSFS_REQ_STORE 0x3c
|
||||
#define SCD_MSG_SYSFS_RESP_STORE 0x3d
|
||||
#define SCD_MSG_SYSFS_REQ_RELEASE 0x3e
|
||||
#define SCD_MSG_SYSFS_RESP_RELEASE 0x3f
|
||||
#define SCD_MSG_SYSFS_REQ_SETUP 0x40
|
||||
#define SCD_MSG_SYSFS_RESP_SETUP 0x41
|
||||
/* #define SCD_MSG_SYSFS_REQ_CLEANUP 0x42 */
|
||||
/* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */
|
||||
#define SCD_MSG_PROCFS_TID_CREATE 0x44
|
||||
#define SCD_MSG_PROCFS_TID_DELETE 0x45
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
#define __NR_coredump 999
|
||||
|
||||
struct coretable {
|
||||
int len;
|
||||
unsigned long addr;
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int err;
|
||||
union {
|
||||
/* for traditional SCD_MSG_* */
|
||||
struct {
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
unsigned long arg;
|
||||
struct syscall_request req;
|
||||
unsigned long resp_pa;
|
||||
};
|
||||
|
||||
/* for SCD_MSG_SYSFS_* */
|
||||
struct {
|
||||
long sysfs_arg1;
|
||||
long sysfs_arg2;
|
||||
long sysfs_arg3;
|
||||
};
|
||||
|
||||
/* SCD_MSG_SCHEDULE_THREAD */
|
||||
struct {
|
||||
int ttid;
|
||||
};
|
||||
};
|
||||
char padding[12];
|
||||
};
|
||||
|
||||
struct mcctrl_priv {
|
||||
ihk_os_t os;
|
||||
struct program_load_desc *desc;
|
||||
};
|
||||
|
||||
struct ikc_scd_init_param {
|
||||
unsigned long request_page;
|
||||
unsigned long response_page;
|
||||
unsigned long doorbell_page;
|
||||
unsigned long post_page;
|
||||
};
|
||||
|
||||
struct syscall_post {
|
||||
unsigned long v[8];
|
||||
};
|
||||
|
||||
struct syscall_params {
|
||||
unsigned long request_pa;
|
||||
struct syscall_request *request_va;
|
||||
unsigned long response_rpa, response_pa;
|
||||
struct syscall_response *response_va;
|
||||
unsigned long post_pa;
|
||||
struct syscall_post *post_va;
|
||||
|
||||
unsigned long doorbell_pa;
|
||||
unsigned long *doorbell_va;
|
||||
};
|
||||
|
||||
struct wait_queue_head_list_node {
|
||||
struct list_head list;
|
||||
wait_queue_head_t wq_syscall;
|
||||
struct task_struct *task;
|
||||
/* Denotes an exclusive wait for requester TID rtid */
|
||||
int rtid;
|
||||
int req;
|
||||
struct ikc_scd_packet *packet;
|
||||
};
|
||||
|
||||
struct mcctrl_channel {
|
||||
struct ihk_ikc_channel_desc *c;
|
||||
struct ikc_scd_init_param init;
|
||||
void *dma_buf;
|
||||
};
|
||||
|
||||
struct mcctrl_per_thread_data {
|
||||
struct list_head hash;
|
||||
struct task_struct *task;
|
||||
void *data;
|
||||
};
|
||||
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT)
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1)
|
||||
|
||||
struct mcctrl_per_proc_data {
|
||||
struct list_head hash;
|
||||
int pid;
|
||||
unsigned long rpgtable; /* per process, not per OS */
|
||||
|
||||
struct list_head wq_list;
|
||||
struct list_head wq_req_list;
|
||||
struct list_head wq_list_exact;
|
||||
ihk_spinlock_t wq_list_lock;
|
||||
|
||||
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||
rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||
};
|
||||
|
||||
struct sysfsm_req {
|
||||
int busy;
|
||||
int padding;
|
||||
long lresult;
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
struct sysfsm_data {
|
||||
size_t sysfs_bufsize;
|
||||
void *sysfs_buf;
|
||||
long sysfs_buf_rpa;
|
||||
long sysfs_buf_pa;
|
||||
struct kobject *sysfs_kobj;
|
||||
struct sysfsm_node *sysfs_root;
|
||||
struct semaphore sysfs_tree_sem;
|
||||
struct semaphore sysfs_io_sem;
|
||||
struct sysfsm_req sysfs_req;
|
||||
ihk_os_t sysfs_os;
|
||||
};
|
||||
|
||||
static inline int sysfs_inited(struct sysfsm_data *sdp)
|
||||
{
|
||||
return !!(sdp->sysfs_buf);
|
||||
} /* sysfs_inited() */
|
||||
|
||||
struct cache_topology {
|
||||
struct ihk_cache_topology *saved;
|
||||
cpumask_t shared_cpu_map;
|
||||
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
struct cpu_topology {
|
||||
//struct mcctrl_usrdata *udp;
|
||||
struct ihk_cpu_topology *saved;
|
||||
int mckernel_cpu_id;
|
||||
cpumask_t core_siblings;
|
||||
cpumask_t thread_siblings;
|
||||
|
||||
struct list_head chain;
|
||||
struct list_head cache_list;
|
||||
};
|
||||
|
||||
#define NODE_DISTANCE_S_SIZE 1024
|
||||
|
||||
struct node_topology {
|
||||
struct ihk_node_topology *saved;
|
||||
int mckernel_numa_id;
|
||||
char mckernel_numa_distance_s[NODE_DISTANCE_S_SIZE];
|
||||
cpumask_t cpumap;
|
||||
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
|
||||
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_SHIFT 7
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_SIZE (1 << MCCTRL_PER_PROC_DATA_HASH_SHIFT)
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_MASK (MCCTRL_PER_PROC_DATA_HASH_SIZE - 1)
|
||||
|
||||
struct mcctrl_usrdata {
|
||||
struct ihk_ikc_listen_param listen_param;
|
||||
struct ihk_ikc_listen_param listen_param2;
|
||||
ihk_os_t os;
|
||||
int num_channels;
|
||||
struct mcctrl_channel *channels;
|
||||
int remaining_job;
|
||||
int base_cpu;
|
||||
int job_pos;
|
||||
int mcctrl_dma_abort;
|
||||
unsigned long last_thread_exec;
|
||||
wait_queue_head_t wq_prepare;
|
||||
|
||||
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||
rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||
|
||||
void **keys;
|
||||
struct sysfsm_data sysfsm_data;
|
||||
unsigned long cpu_online[CPU_LONGS];
|
||||
struct ihk_cpu_info *cpu_info;
|
||||
struct ihk_mem_info *mem_info;
|
||||
nodemask_t numa_online;
|
||||
struct list_head cpu_topology_list;
|
||||
struct list_head node_topology_list;
|
||||
};
|
||||
|
||||
struct mcctrl_signal {
|
||||
int cond;
|
||||
int sig;
|
||||
int pid;
|
||||
int tid;
|
||||
char info[128];
|
||||
};
|
||||
|
||||
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
|
||||
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
|
||||
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
|
||||
|
||||
ihk_os_t osnum_to_os(int n);
|
||||
|
||||
/* syscall.c */
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
|
||||
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||
struct mcctrl_per_proc_data *ppd);
|
||||
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid);
|
||||
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||
struct mcctrl_usrdata *ud, int pid);
|
||||
|
||||
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||
struct task_struct *task, void *data);
|
||||
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||
struct task_struct *task);
|
||||
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
|
||||
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
|
||||
|
||||
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||
long ret, int stid);
|
||||
|
||||
#define PROCFS_NAME_MAX 768
|
||||
|
||||
struct procfs_read {
|
||||
unsigned long pbuf; /* physical address of the host buffer (request) */
|
||||
unsigned long offset; /* offset to read (request) */
|
||||
int count; /* bytes to read (request) */
|
||||
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
|
||||
int ret; /* read bytes (answer) */
|
||||
int status; /* non-zero if done (answer) */
|
||||
int newcpu; /* migrated new cpu (answer) */
|
||||
int readwrite; /* 0:read, 1:write */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
struct procfs_file {
|
||||
int status; /* status of processing (answer) */
|
||||
int mode; /* file mode (request) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
void procfs_answer(unsigned int arg, int err);
|
||||
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg);
|
||||
void add_tid_entry(int osnum, int pid, int tid);
|
||||
void add_pid_entry(int osnum, int pid);
|
||||
void delete_tid_entry(int osnum, int pid, int tid);
|
||||
void delete_pid_entry(int osnum, int pid);
|
||||
void proc_exe_link(int osnum, int pid, const char *path);
|
||||
void procfs_init(int osnum);
|
||||
void procfs_exit(int osnum);
|
||||
|
||||
/* sysfs_files.c */
|
||||
void setup_sysfs_files(ihk_os_t os);
|
||||
void reply_get_cpu_mapping(long req_pa);
|
||||
void free_topology_info(ihk_os_t os);
|
||||
|
||||
/* archdep.c */
|
||||
#define VDSO_MAXPAGES 2
|
||||
struct vdso {
|
||||
long busy;
|
||||
int vdso_npages;
|
||||
char vvar_is_global;
|
||||
char hpet_is_global;
|
||||
char pvti_is_global;
|
||||
char padding;
|
||||
long vdso_physlist[VDSO_MAXPAGES];
|
||||
void *vvar_virt;
|
||||
long vvar_phys;
|
||||
void *hpet_virt;
|
||||
long hpet_phys;
|
||||
void *pvti_virt;
|
||||
long pvti_phys;
|
||||
};
|
||||
|
||||
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
|
||||
unsigned long *endp);
|
||||
void get_vdso_info(ihk_os_t os, long vdso_pa);
|
||||
|
||||
struct get_cpu_mapping_req {
|
||||
int busy; /* INOUT: */
|
||||
int error; /* OUT: */
|
||||
long buf_rpa; /* OUT: physical address of struct cpu_mapping */
|
||||
int buf_elems; /* OUT: # of elements of buf */
|
||||
int padding;
|
||||
|
||||
/* work for mcctrl */
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
#endif
|
||||
838
executer/kernel/mcctrl/procfs.c
Normal file
838
executer/kernel/mcctrl/procfs.c
Normal file
@ -0,0 +1,838 @@
|
||||
/**
|
||||
* \file procfs.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* mcctrl procfs
|
||||
* \author Naoki Hamada <nao@axe.bz> \par
|
||||
* Copyright (C) 2014 AXE, Inc.
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/resource.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
#ifdef PROCFS_DEBUG
|
||||
#define dprintk(...) printk(__VA_ARGS__)
|
||||
#else
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
typedef uid_t kuid_t;
|
||||
typedef gid_t kgid_t;
|
||||
#endif
|
||||
|
||||
struct procfs_entry {
|
||||
char *name;
|
||||
mode_t mode;
|
||||
const struct file_operations *fops;
|
||||
};
|
||||
|
||||
#define NOD(NAME, MODE, FOP) { \
|
||||
.name = (NAME), \
|
||||
.mode = MODE, \
|
||||
.fops = FOP, \
|
||||
}
|
||||
#define PROC_DIR(NAME, MODE) \
|
||||
NOD(NAME, (S_IFDIR|(MODE)), NULL)
|
||||
#define PROC_REG(NAME, MODE, fops) \
|
||||
NOD(NAME, (S_IFREG|(MODE)), fops)
|
||||
#define PROC_TERM \
|
||||
NOD(NULL, 0, NULL)
|
||||
|
||||
static const struct procfs_entry tid_entry_stuff[];
|
||||
static const struct procfs_entry pid_entry_stuff[];
|
||||
static const struct procfs_entry base_entry_stuff[];
|
||||
static const struct file_operations mckernel_forward_ro;
|
||||
static const struct file_operations mckernel_forward;
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
struct procfs_list_entry;
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *parent;
|
||||
struct list_head children;
|
||||
int osnum;
|
||||
char *data;
|
||||
char name[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* In the procfs_file_list, mckenrel procfs files are
|
||||
* listed in the manner that the leaf file is located
|
||||
* always nearer to the list top than its parent node
|
||||
* file.
|
||||
*/
|
||||
LIST_HEAD(procfs_file_list);
|
||||
DEFINE_SEMAPHORE(procfs_file_list_lock);
|
||||
|
||||
static char *
|
||||
getpath(struct procfs_list_entry *e, char *buf, int bufsize)
|
||||
{
|
||||
char *w = buf + bufsize - 1;
|
||||
|
||||
*w = '\0';
|
||||
for(;;){
|
||||
int l = strlen(e->name);
|
||||
w -= l;
|
||||
memcpy(w, e->name, l);
|
||||
e = e->parent;
|
||||
if(!e)
|
||||
return w;
|
||||
w--;
|
||||
*w = '/';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Process SCD_MSG_PROCFS_ANSWER message.
|
||||
*
|
||||
* \param arg sent argument
|
||||
* \param err error info (redundant)
|
||||
*/
|
||||
void
|
||||
procfs_answer(unsigned int arg, int err)
|
||||
{
|
||||
dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err);
|
||||
wake_up_interruptible(&procfsq);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_procfs_entry(struct procfs_list_entry *parent, const char *name)
|
||||
{
|
||||
struct list_head *list;
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
if(parent == NULL)
|
||||
list = &procfs_file_list;
|
||||
else
|
||||
list = &parent->children;
|
||||
|
||||
list_for_each_entry(e, list, list) {
|
||||
if(!strcmp(e->name, name))
|
||||
return e;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
delete_procfs_entries(struct procfs_list_entry *top)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
struct procfs_list_entry *n;
|
||||
|
||||
list_del(&top->list);
|
||||
|
||||
list_for_each_entry_safe(e, n, &top->children, list) {
|
||||
delete_procfs_entries(e);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
remove_proc_entry(top->name, top->parent? top->parent->entry: NULL);
|
||||
if(top->data)
|
||||
kfree(top->data);
|
||||
kfree(top);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
add_procfs_entry(struct procfs_list_entry *parent, const char *name, int mode,
|
||||
kuid_t uid, kgid_t gid, const void *opaque)
|
||||
{
|
||||
struct procfs_list_entry *e = find_procfs_entry(parent, name);
|
||||
struct proc_dir_entry *pde;
|
||||
struct proc_dir_entry *parent_pde = NULL;
|
||||
int f_mode = mode & 0777;
|
||||
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
|
||||
e = kmalloc(sizeof(struct procfs_list_entry) + strlen(name) + 1,
|
||||
GFP_KERNEL);
|
||||
if(!e){
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
memset(e, '\0', sizeof(struct procfs_list_entry));
|
||||
INIT_LIST_HEAD(&e->children);
|
||||
strcpy(e->name, name);
|
||||
|
||||
if(parent)
|
||||
parent_pde = parent->entry;
|
||||
|
||||
if (mode & S_IFDIR) {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = proc_mkdir(name, parent_pde);
|
||||
#else
|
||||
pde = proc_mkdir_data(name, f_mode, parent_pde, e);
|
||||
#endif
|
||||
}
|
||||
else if ((mode & S_IFLNK) == S_IFLNK) {
|
||||
pde = proc_symlink(name, parent_pde, (char *)opaque);
|
||||
}
|
||||
else {
|
||||
const struct file_operations *fop;
|
||||
|
||||
if(opaque)
|
||||
fop = (const struct file_operations *)opaque;
|
||||
else if(mode & S_IWUSR)
|
||||
fop = &mckernel_forward;
|
||||
else
|
||||
fop = &mckernel_forward_ro;
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = create_proc_entry(name, f_mode, parent_pde);
|
||||
if(pde)
|
||||
pde->proc_fops = fop;
|
||||
#else
|
||||
pde = proc_create_data(name, f_mode, parent_pde, fop, e);
|
||||
if(pde)
|
||||
proc_set_user(pde, uid, gid);
|
||||
#endif
|
||||
}
|
||||
if(!pde){
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", name);
|
||||
kfree(e);
|
||||
return NULL;
|
||||
}
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde->uid = uid;
|
||||
pde->gid = gid;
|
||||
pde->data = e;
|
||||
#endif
|
||||
|
||||
if(parent)
|
||||
e->osnum = parent->osnum;
|
||||
e->entry = pde;
|
||||
e->parent = parent;
|
||||
list_add(&(e->list), parent? &(parent->children): &procfs_file_list);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
static void
|
||||
add_procfs_entries(struct procfs_list_entry *parent,
|
||||
const struct procfs_entry *entries, kuid_t uid, kgid_t gid)
|
||||
{
|
||||
const struct procfs_entry *p;
|
||||
|
||||
for(p = entries; p->name; p++){
|
||||
add_procfs_entry(parent, p->name, p->mode, uid, gid, p->fops);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct cred *
|
||||
get_pid_cred(int pid)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
|
||||
if(pid > 0){
|
||||
task = pid_task(find_vpid(pid), PIDTYPE_PID);
|
||||
if(task){
|
||||
return __task_cred(task);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_base_entry(int osnum)
|
||||
{
|
||||
char name[12];
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
return find_procfs_entry(NULL, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
|
||||
if(!(e = find_base_entry(osnum)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
return find_procfs_entry(e, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
|
||||
if(!(e = find_pid_entry(osnum, pid)))
|
||||
return NULL;
|
||||
if(!(e = find_procfs_entry(e, "task")))
|
||||
return NULL;
|
||||
sprintf(name, "%d", tid);
|
||||
return find_procfs_entry(e, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_base_entry(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
e = find_procfs_entry(NULL, name);
|
||||
if(!e){
|
||||
e = add_procfs_entry(NULL, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
e->osnum = osnum;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
if(!(parent = find_procfs_entry(NULL, name)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
e = find_procfs_entry(parent, name);
|
||||
if(!e)
|
||||
e = add_procfs_entry(parent, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
if(!(parent = find_procfs_entry(NULL, name)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
if(!(parent = find_procfs_entry(parent, name)))
|
||||
return NULL;
|
||||
if(!(parent = find_procfs_entry(parent, "task")))
|
||||
return NULL;
|
||||
sprintf(name, "%d", tid);
|
||||
e = find_procfs_entry(parent, name);
|
||||
if(!e)
|
||||
e = add_procfs_entry(parent, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
return e;
|
||||
}
|
||||
|
||||
static void
|
||||
_add_tid_entry(int osnum, int pid, int tid, const struct cred *cred)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *exe;
|
||||
|
||||
parent = get_tid_entry(osnum, pid, tid);
|
||||
if(parent){
|
||||
add_procfs_entries(parent, tid_entry_stuff,
|
||||
cred->uid, cred->gid);
|
||||
exe = find_procfs_entry(parent->parent->parent, "exe");
|
||||
if(exe){
|
||||
add_procfs_entry(parent, "exe", S_IFLNK | 0777,
|
||||
cred->uid, cred->gid, exe->data);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
add_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
const struct cred *cred = get_pid_cred(pid);
|
||||
|
||||
if(!cred)
|
||||
return;
|
||||
down(&procfs_file_list_lock);
|
||||
_add_tid_entry(osnum, pid, tid, cred);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
add_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
const struct cred *cred = get_pid_cred(pid);
|
||||
|
||||
if(!cred)
|
||||
return;
|
||||
down(&procfs_file_list_lock);
|
||||
parent = get_pid_entry(osnum, pid);
|
||||
add_procfs_entries(parent, pid_entry_stuff, cred->uid, cred->gid);
|
||||
_add_tid_entry(osnum, pid, pid, cred);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
delete_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_tid_entry(osnum, pid, tid);
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
delete_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_pid_entry(osnum, pid);
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
proc_exe_link(int osnum, int pid, const char *path)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
parent = find_pid_entry(osnum, pid);
|
||||
if(parent){
|
||||
struct procfs_list_entry *task;
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid,
|
||||
path);
|
||||
e->data = kmalloc(strlen(path) + 1, GFP_KERNEL);
|
||||
strcpy(e->data, path);
|
||||
task = find_procfs_entry(parent, "task");
|
||||
list_for_each_entry(parent, &task->children, list) {
|
||||
add_procfs_entry(parent, "exe", S_IFLNK | 0777,
|
||||
uid, gid, path);
|
||||
}
|
||||
}
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initialization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
void
|
||||
procfs_init(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
parent = get_base_entry(osnum);
|
||||
add_procfs_entries(parent, base_entry_stuff, uid, gid);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Finalization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
void
|
||||
procfs_exit(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_base_entry(osnum);
|
||||
if (e) {
|
||||
delete_procfs_entries(e);
|
||||
}
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The callback funciton for McKernel procfs
|
||||
*
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
static ssize_t
|
||||
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode * inode = file->f_inode;
|
||||
char *kern_buffer = NULL;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r = NULL;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
char pathbuf[PROCFS_NAME_MAX];
|
||||
char *path;
|
||||
|
||||
path = getpath(e, pathbuf, 256);
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
path, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
r->readwrite = 0;
|
||||
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = 0;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
|
||||
if (r->ret > 0) {
|
||||
if (copy_to_user(buf, kern_buffer, r->ret)) {
|
||||
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += r->ret;
|
||||
}
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
if(kern_buffer)
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
if(r)
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
mckernel_procfs_write(struct file *file, const char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode * inode = file->f_inode;
|
||||
char *kern_buffer = NULL;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r = NULL;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
char pathbuf[PROCFS_NAME_MAX];
|
||||
char *path;
|
||||
|
||||
path = getpath(e, pathbuf, 256);
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
path, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (copy_from_user(kern_buffer, buf, nbytes)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu);
|
||||
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
r->readwrite = 1;
|
||||
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = 0;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
|
||||
if (r->ret > 0) {
|
||||
*ppos += r->ret;
|
||||
}
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
if(kern_buffer)
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
if(r)
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static loff_t
|
||||
mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
||||
{
|
||||
switch (orig) {
|
||||
case 0:
|
||||
file->f_pos = offset;
|
||||
break;
|
||||
case 1:
|
||||
file->f_pos += offset;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return file->f_pos;
|
||||
}
|
||||
|
||||
struct procfs_work {
|
||||
void *os;
|
||||
int msg;
|
||||
int pid;
|
||||
unsigned long arg;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static void procfsm_work_main(struct work_struct *work0)
|
||||
{
|
||||
struct procfs_work *work = container_of(work0, struct procfs_work, work);
|
||||
|
||||
switch (work->msg) {
|
||||
case SCD_MSG_PROCFS_TID_CREATE:
|
||||
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_TID_DELETE:
|
||||
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
|
||||
__FUNCTION__, work->msg, work->pid, work->arg);
|
||||
break;
|
||||
}
|
||||
|
||||
kfree(work);
|
||||
return;
|
||||
}
|
||||
|
||||
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
|
||||
{
|
||||
struct procfs_work *work = NULL;
|
||||
|
||||
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
||||
if (!work) {
|
||||
printk("%s: kzalloc failed\n", __FUNCTION__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
work->os = os;
|
||||
work->msg = msg;
|
||||
work->pid = pid;
|
||||
work->arg = arg;
|
||||
INIT_WORK(&work->work, &procfsm_work_main);
|
||||
|
||||
schedule_work(&work->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations mckernel_forward_ro = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = NULL,
|
||||
};
|
||||
|
||||
static const struct file_operations mckernel_forward = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = mckernel_procfs_write,
|
||||
};
|
||||
|
||||
static const struct procfs_entry tid_entry_stuff[] = {
|
||||
// PROC_REG("auxv", S_IRUSR, NULL),
|
||||
// PROC_REG("clear_refs", S_IWUSR, NULL),
|
||||
// PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
|
||||
// PROC_REG("environ", S_IRUSR, NULL),
|
||||
// PROC_LNK("exe", mckernel_readlink),
|
||||
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
|
||||
// PROC_REG("maps", S_IRUGO, NULL),
|
||||
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
|
||||
// PROC_REG("pagemap", S_IRUGO, NULL),
|
||||
// PROC_REG("smaps", S_IRUGO, NULL),
|
||||
PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("statm", S_IRUGO, NULL),
|
||||
// PROC_REG("status", S_IRUGO, NULL),
|
||||
// PROC_REG("syscall", S_IRUGO, NULL),
|
||||
// PROC_REG("wchan", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
|
||||
static const struct procfs_entry pid_entry_stuff[] = {
|
||||
PROC_REG("auxv", S_IRUSR, NULL),
|
||||
PROC_REG("cgroup", S_IXUSR, NULL),
|
||||
// PROC_REG("clear_refs", S_IWUSR, NULL),
|
||||
PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
|
||||
// PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL),
|
||||
PROC_REG("cpuset", S_IXUSR, NULL),
|
||||
// PROC_REG("environ", S_IRUSR, NULL),
|
||||
// PROC_LNK("exe", mckernel_readlink),
|
||||
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
|
||||
PROC_REG("maps", S_IRUGO, NULL),
|
||||
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
|
||||
PROC_REG("pagemap", S_IRUGO, NULL),
|
||||
PROC_REG("smaps", S_IRUGO, NULL),
|
||||
// PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("statm", S_IRUGO, NULL),
|
||||
PROC_REG("status", S_IRUGO, NULL),
|
||||
// PROC_REG("syscall", S_IRUGO, NULL),
|
||||
PROC_DIR("task", S_IRUGO|S_IXUGO),
|
||||
// PROC_REG("wchan", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
|
||||
static const struct procfs_entry base_entry_stuff[] = {
|
||||
// PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("cpuinfo", S_IRUGO, NULL),
|
||||
// PROC_REG("meminfo", S_IRUGO, NULL),
|
||||
// PROC_REG("pagetypeinfo",S_IRUGO, NULL),
|
||||
// PROC_REG("softirq", S_IRUGO, NULL),
|
||||
PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("uptime", S_IRUGO, NULL),
|
||||
// PROC_REG("version", S_IRUGO, NULL),
|
||||
// PROC_REG("vmallocinfo",S_IRUSR, NULL),
|
||||
// PROC_REG("vmstat", S_IRUGO, NULL),
|
||||
// PROC_REG("zoneinfo", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
2502
executer/kernel/mcctrl/sysfs.c
Normal file
2502
executer/kernel/mcctrl/sysfs.c
Normal file
File diff suppressed because it is too large
Load Diff
73
executer/kernel/mcctrl/sysfs.h
Normal file
73
executer/kernel/mcctrl/sysfs.h
Normal file
@ -0,0 +1,73 @@
|
||||
/**
|
||||
* \file sysfs.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* sysfs framework API definitions
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2016 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCCTRL_SYSFS_H
|
||||
#define MCCTRL_SYSFS_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
/* for sysfs_unlinkf() */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
|
||||
struct sysfsm_ops {
|
||||
ssize_t (*show)(struct sysfsm_ops *ops, void *instance, void *buf,
|
||||
size_t bufsize);
|
||||
ssize_t (*store)(struct sysfsm_ops *ops, void *instance,
|
||||
const void *buf, size_t bufsize);
|
||||
void (*release)(struct sysfsm_ops *ops, void *instance);
|
||||
};
|
||||
|
||||
struct sysfs_handle {
|
||||
long handle;
|
||||
};
|
||||
typedef struct sysfs_handle sysfs_handle_t;
|
||||
|
||||
struct sysfsm_bitmap_param {
|
||||
int nbits;
|
||||
int padding;
|
||||
void *ptr;
|
||||
};
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
static inline int is_special_sysfs_ops(void *ops)
|
||||
{
|
||||
return (((long)SYSFS_SPECIAL_OPS_MIN <= (long)ops)
|
||||
&& ((long)ops <= (long)SYSFS_SPECIAL_OPS_MAX));
|
||||
}
|
||||
|
||||
extern int sysfsm_createf(ihk_os_t os, struct sysfsm_ops *ops, void *instance,
|
||||
int mode, const char *fmt, ...);
|
||||
extern int sysfsm_mkdirf(ihk_os_t os, sysfs_handle_t *dirhp,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_symlinkf(ihk_os_t os, sysfs_handle_t targeth,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_lookupf(ihk_os_t os, sysfs_handle_t *objhp,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_unlinkf(ihk_os_t os, int flags, const char *fmt, ...);
|
||||
|
||||
extern void sysfsm_cleanup(ihk_os_t os);
|
||||
extern void sysfsm_packet_handler(void *os, int msg, int err, long arg1,
|
||||
long arg2);
|
||||
|
||||
#endif /* MCCTRL_SYSFS_H */
|
||||
1100
executer/kernel/mcctrl/sysfs_files.c
Normal file
1100
executer/kernel/mcctrl/sysfs_files.c
Normal file
File diff suppressed because it is too large
Load Diff
88
executer/kernel/mcctrl/sysfs_msg.h
Normal file
88
executer/kernel/mcctrl/sysfs_msg.h
Normal file
@ -0,0 +1,88 @@
|
||||
/**
|
||||
* \file sysfs_msg.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* message declarations for sysfs framework
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCKERNEL_SYSFS_MSG_H
|
||||
#define MCKERNEL_SYSFS_MSG_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
struct sysfs_req_create_param {
|
||||
int mode;
|
||||
int error;
|
||||
long client_ops;
|
||||
long client_instance;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_create_param */
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
struct sysfs_req_mkdir_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_mkdir_param */
|
||||
|
||||
struct sysfs_req_symlink_param {
|
||||
int error;
|
||||
int padding;
|
||||
long target;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_symlink_param */
|
||||
|
||||
struct sysfs_req_lookup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_lookup_param */
|
||||
|
||||
/* for sysfs_req_unlink_param.flags */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
struct sysfs_req_unlink_param {
|
||||
int flags;
|
||||
int error;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_unlink_param */
|
||||
|
||||
struct sysfs_req_setup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long buf_rpa;
|
||||
long bufsize;
|
||||
char padding3[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_setup_param */
|
||||
|
||||
#endif /* MCKERNEL_SYSFS_MSG_H */
|
||||
44
executer/kernel/mcoverlayfs/Makefile.in
Normal file
44
executer/kernel/mcoverlayfs/Makefile.in
Normal file
@ -0,0 +1,44 @@
|
||||
ENABLE_MCOVERLAYFS=@ENABLE_MCOVERLAYFS@
|
||||
|
||||
RELEASE=$(shell uname -r)
|
||||
MAJOR=$(shell echo ${RELEASE} | sed -e 's/^\([0-9]*\).*/\1/')
|
||||
MINOR=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/')
|
||||
PATCH=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/')
|
||||
LINUX_VERSION_CODE=$(shell expr \( ${MAJOR} \* 65536 \) + \( ${MINOR} \* 256 \) + ${PATCH})
|
||||
RHEL_RELEASE_TMP=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/')
|
||||
RHEL_RELEASE=$(shell if [ "${RELEASE}" == "${RHEL_RELEASE_TMP}" ]; then echo ""; else echo ${RHEL_RELEASE_TMP}; fi)
|
||||
BUILD_MODULE_TMP=$(shell if [ "${RHEL_RELEASE}" == "" ]; then echo "org"; else echo "rhel"; fi)
|
||||
BUILD_MODULE=none
|
||||
ifeq ($(ENABLE_MCOVERLAYFS),yes)
|
||||
ifeq ($(BUILD_MODULE_TMP),org)
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 243680 -a ${LINUX_VERSION_CODE} -lt 263936 ]; then echo "linux-4.6.7"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE_TMP),rhel)
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -eq 327 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make modules)
|
||||
endif
|
||||
|
||||
clean:
|
||||
@(cd linux-3.10.0-327.36.1.el7; make clean)
|
||||
@(cd linux-4.0.9; make clean)
|
||||
@(cd linux-4.6.7; make clean)
|
||||
|
||||
install:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make install)
|
||||
endif
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
KMODDIR = @KMODDIR@
|
||||
src = @abs_srcdir@
|
||||
|
||||
obj-m += mcoverlay.o
|
||||
|
||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||
|
||||
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
@ -0,0 +1,461 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
static unsigned ovl_check_copy_up = 1;
|
||||
module_param_named(check_copy_up, ovl_check_copy_up, uint,
|
||||
S_IWUSR | S_IRUGO);
|
||||
MODULE_PARM_DESC(ovl_check_copy_up,
|
||||
"Warn on copy-up when causing process also has a R/O fd open");
|
||||
|
||||
static int ovl_check_fd(const void *data, struct file *f, unsigned fd)
|
||||
{
|
||||
const struct dentry *dentry = data;
|
||||
|
||||
if (f->f_path.dentry == dentry)
|
||||
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
|
||||
f, fd, current->pid, current->comm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the fds open by this process and warn if something like the following
|
||||
* scenario is about to occur:
|
||||
*
|
||||
* fd1 = open("foo", O_RDONLY);
|
||||
* fd2 = open("foo", O_RDWR);
|
||||
*/
|
||||
static void ovl_do_check_copy_up(struct dentry *dentry)
|
||||
{
|
||||
if (ovl_check_copy_up)
|
||||
iterate_fd(current->files, 0, ovl_check_fd, dentry);
|
||||
}
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||
{
|
||||
ssize_t list_size, size, value_size = 0;
|
||||
char *buf, *name, *value = NULL;
|
||||
int uninitialized_var(error);
|
||||
|
||||
if (!old->d_inode->i_op->getxattr ||
|
||||
!new->d_inode->i_op->getxattr)
|
||||
return 0;
|
||||
|
||||
list_size = vfs_listxattr(old, NULL, 0);
|
||||
if (list_size <= 0) {
|
||||
if (list_size == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return list_size;
|
||||
}
|
||||
|
||||
buf = kzalloc(list_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
list_size = vfs_listxattr(old, buf, list_size);
|
||||
if (list_size <= 0) {
|
||||
error = list_size;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||
retry:
|
||||
size = vfs_getxattr(old, name, value, value_size);
|
||||
if (size == -ERANGE)
|
||||
size = vfs_getxattr(old, name, NULL, 0);
|
||||
|
||||
if (size < 0) {
|
||||
error = size;
|
||||
break;
|
||||
}
|
||||
|
||||
if (size > value_size) {
|
||||
void *new;
|
||||
|
||||
new = krealloc(value, size, GFP_KERNEL);
|
||||
if (!new) {
|
||||
error = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
value = new;
|
||||
value_size = size;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
kfree(value);
|
||||
out:
|
||||
kfree(buf);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||
{
|
||||
struct file *old_file;
|
||||
struct file *new_file;
|
||||
loff_t old_pos = 0;
|
||||
loff_t new_pos = 0;
|
||||
int error = 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
old_file = ovl_path_open(old, O_RDONLY);
|
||||
if (IS_ERR(old_file))
|
||||
return PTR_ERR(old_file);
|
||||
|
||||
new_file = ovl_path_open(new, O_WRONLY);
|
||||
if (IS_ERR(new_file)) {
|
||||
error = PTR_ERR(new_file);
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
/* FIXME: copy up sparse files efficiently */
|
||||
while (len) {
|
||||
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||
long bytes;
|
||||
|
||||
if (len < this_len)
|
||||
this_len = len;
|
||||
|
||||
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||
error = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
bytes = do_splice_direct(old_file, &old_pos,
|
||||
new_file, &new_pos,
|
||||
this_len, SPLICE_F_MOVE);
|
||||
if (bytes <= 0) {
|
||||
error = bytes;
|
||||
break;
|
||||
}
|
||||
WARN_ON(old_pos != new_pos);
|
||||
|
||||
len -= bytes;
|
||||
}
|
||||
|
||||
fput(new_file);
|
||||
out_fput:
|
||||
fput(old_file);
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||
{
|
||||
int res;
|
||||
char *buf;
|
||||
struct inode *inode = realdentry->d_inode;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
res = -EINVAL;
|
||||
if (!inode->i_op->readlink)
|
||||
goto err;
|
||||
|
||||
res = -ENOMEM;
|
||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
old_fs = get_fs();
|
||||
set_fs(get_ds());
|
||||
/* The cast to a user pointer is valid due to the set_fs() */
|
||||
res = inode->i_op->readlink(realdentry,
|
||||
(char __user *)buf, PAGE_SIZE - 1);
|
||||
set_fs(old_fs);
|
||||
if (res < 0) {
|
||||
free_page((unsigned long) buf);
|
||||
goto err;
|
||||
}
|
||||
buf[res] = '\0';
|
||||
|
||||
return buf;
|
||||
|
||||
err:
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid =
|
||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||
.ia_atime = stat->atime,
|
||||
.ia_mtime = stat->mtime,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!S_ISLNK(stat->mode)) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_MODE,
|
||||
.ia_mode = stat->mode,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_UID | ATTR_GID,
|
||||
.ia_uid = stat->uid,
|
||||
.ia_gid = stat->gid,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err)
|
||||
ovl_set_timestamps(upperdentry, stat);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||
struct dentry *dentry, struct path *lowerpath,
|
||||
struct kstat *stat, struct iattr *attr,
|
||||
const char *link)
|
||||
{
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry = NULL;
|
||||
struct dentry *upper = NULL;
|
||||
umode_t mode = stat->mode;
|
||||
int err;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out1;
|
||||
|
||||
/* Can't properly set mode on creation because of the umask */
|
||||
stat->mode &= S_IFMT;
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||
stat->mode = mode;
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
if (S_ISREG(stat->mode)) {
|
||||
struct path upperpath;
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = newdentry;
|
||||
|
||||
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(lowerpath->dentry, newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&newdentry->d_inode->i_mutex);
|
||||
err = ovl_set_attr(newdentry, stat);
|
||||
if (!err && attr)
|
||||
err = notify_change(newdentry, attr, NULL);
|
||||
mutex_unlock(&newdentry->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
newdentry = NULL;
|
||||
|
||||
/*
|
||||
* Non-directores become opaque when copied up.
|
||||
*/
|
||||
if (!S_ISDIR(stat->mode))
|
||||
ovl_dentry_set_opaque(dentry, true);
|
||||
out2:
|
||||
dput(upper);
|
||||
out1:
|
||||
dput(newdentry);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy up a single dentry
|
||||
*
|
||||
* Directory renames only allowed on "pure upper" (already created on
|
||||
* upper filesystem, never copied up). Directories which are on lower or
|
||||
* are merged may not be renamed. For these -EXDEV is returned and
|
||||
* userspace has to deal with it. This means, when copying up a
|
||||
* directory we can rely on it and ancestors being stable.
|
||||
*
|
||||
* Non-directory renames start with copy up of source if necessary. The
|
||||
* actual rename will only proceed once the copy up was successful. Copy
|
||||
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||
* d_parent it is possible that the copy up will lock the old parent. At
|
||||
* that point the file will have already been copied up anyway.
|
||||
*/
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
int err;
|
||||
struct kstat pstat;
|
||||
struct path parentpath;
|
||||
struct dentry *upperdir;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
char *link = NULL;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
ovl_do_check_copy_up(lowerpath->dentry);
|
||||
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
upperdir = parentpath.dentry;
|
||||
|
||||
err = vfs_getattr(&parentpath, &pstat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (S_ISLNK(stat->mode)) {
|
||||
link = ovl_read_symlink(lowerpath->dentry);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_free_link;
|
||||
|
||||
override_cred->fsuid = stat->uid;
|
||||
override_cred->fsgid = stat->gid;
|
||||
/*
|
||||
* CAP_SYS_ADMIN for copying up extended attributes
|
||||
* CAP_DAC_OVERRIDE for create
|
||||
* CAP_FOWNER for chmod, timestamp update
|
||||
* CAP_FSETID for chmod
|
||||
* CAP_CHOWN for chown
|
||||
* CAP_MKNOD for mknod
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = -EIO;
|
||||
if (lock_rename(workdir, upperdir) != NULL) {
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
unlock_rename(workdir, upperdir);
|
||||
err = 0;
|
||||
/* Raced with another copy-up? Do the setattr here */
|
||||
if (attr) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
goto out_put_cred;
|
||||
}
|
||||
|
||||
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||
stat, attr, link);
|
||||
if (!err) {
|
||||
/* Restore timestamps on parent (best effort) */
|
||||
ovl_set_timestamps(upperdir, &pstat);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_put_cred:
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
out_free_link:
|
||||
if (link)
|
||||
free_page((unsigned long) link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
struct dentry *next;
|
||||
struct dentry *parent;
|
||||
struct path lowerpath;
|
||||
struct kstat stat;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
/* find the topmost dentry not yet copied up */
|
||||
for (;;) {
|
||||
parent = dget_parent(next);
|
||||
|
||||
type = ovl_path_type(parent);
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
dput(next);
|
||||
next = parent;
|
||||
}
|
||||
|
||||
ovl_path_lower(next, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (!err)
|
||||
err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
|
||||
|
||||
dput(parent);
|
||||
dput(next);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
@ -0,0 +1,972 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
dget(wdentry);
|
||||
if (S_ISDIR(wdentry->d_inode->i_mode))
|
||||
err = ovl_do_rmdir(wdir, wdentry);
|
||||
else
|
||||
err = ovl_do_unlink(wdir, wdentry);
|
||||
dput(wdentry);
|
||||
|
||||
if (err) {
|
||||
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||
wdentry, err);
|
||||
}
|
||||
}
|
||||
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||
{
|
||||
struct dentry *temp;
|
||||
char name[20];
|
||||
|
||||
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||
|
||||
temp = lookup_one_len(name, workdir, strlen(name));
|
||||
if (!IS_ERR(temp) && temp->d_inode) {
|
||||
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||
dput(temp);
|
||||
temp = ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* caller holds i_mutex on workdir */
|
||||
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *whiteout;
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
|
||||
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||
if (IS_ERR(whiteout))
|
||||
return whiteout;
|
||||
|
||||
err = ovl_do_whiteout(wdir, whiteout);
|
||||
if (err) {
|
||||
dput(whiteout);
|
||||
whiteout = ERR_PTR(err);
|
||||
}
|
||||
|
||||
return whiteout;
|
||||
}
|
||||
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (newdentry->d_inode)
|
||||
return -ESTALE;
|
||||
|
||||
if (hardlink) {
|
||||
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||
} else {
|
||||
switch (stat->mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFCHR:
|
||||
case S_IFBLK:
|
||||
case S_IFIFO:
|
||||
case S_IFSOCK:
|
||||
err = ovl_do_mknod(dir, newdentry,
|
||||
stat->mode, stat->rdev, debug);
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EPERM;
|
||||
}
|
||||
}
|
||||
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||
/*
|
||||
* Not quite sure if non-instantiated dentry is legal or not.
|
||||
* VFS doesn't seem to care so check and warn here.
|
||||
*/
|
||||
err = -ENOENT;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||
}
|
||||
|
||||
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||
if (err) {
|
||||
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||
upperdentry->d_name.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type type;
|
||||
struct path realpath;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, stat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
stat->dev = dentry->d_sb->s_dev;
|
||||
stat->ino = dentry->d_inode->i_ino;
|
||||
|
||||
/*
|
||||
* It's probably not worth it to count subdirs to get the
|
||||
* correct link count. nlink=1 seems to pacify 'find' and
|
||||
* other utilities.
|
||||
*/
|
||||
if (OVL_TYPE_MERGE(type))
|
||||
stat->nlink = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
|
||||
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&udir->i_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||
struct dentry *upperdir)
|
||||
{
|
||||
/* Workdir should not be the same as upperdir */
|
||||
if (workdir == upperdir)
|
||||
goto err;
|
||||
|
||||
/* Workdir should not be subdir of upperdir and vice versa */
|
||||
if (lock_rename(workdir, upperdir) != NULL)
|
||||
goto err_unlock;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct path upperpath;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir;
|
||||
struct kstat stat;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
err = vfs_getattr(&upperpath, &stat);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (!S_ISDIR(stat.mode))
|
||||
goto out_unlock;
|
||||
upper = upperpath.dentry;
|
||||
if (upper->d_parent->d_inode != udir)
|
||||
goto out_unlock;
|
||||
|
||||
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out_unlock;
|
||||
|
||||
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_copy_xattr(upper, opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_set_opaque(opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&opaquedir->d_inode->i_mutex);
|
||||
err = ovl_set_attr(opaquedir, &stat);
|
||||
mutex_unlock(&opaquedir->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup_whiteouts(upper, list);
|
||||
ovl_cleanup(wdir, upper);
|
||||
unlock_rename(workdir, upperdir);
|
||||
|
||||
/* dentry's upper doesn't match now, get rid of it */
|
||||
d_drop(dentry);
|
||||
|
||||
return opaquedir;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, opaquedir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *ret = NULL;
|
||||
LIST_HEAD(list);
|
||||
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
if (err)
|
||||
ret = ERR_PTR(err);
|
||||
else {
|
||||
/*
|
||||
* If no upperdentry then skip clearing whiteouts.
|
||||
*
|
||||
* Can race with copy-up, since we don't hold the upperdir
|
||||
* mutex. Doesn't matter, since copy-up can't create a
|
||||
* non-empty directory from an empty one.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry))
|
||||
ret = ovl_clear_empty(dentry, &list);
|
||||
}
|
||||
|
||||
ovl_cache_free(&list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||
if (err)
|
||||
goto out_dput2;
|
||||
|
||||
if (S_ISDIR(stat->mode)) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||
RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup(wdir, upper);
|
||||
} else {
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput2:
|
||||
dput(upper);
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out_dput2;
|
||||
}
|
||||
|
||||
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link, struct dentry *hardlink)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
struct kstat stat = {
|
||||
.mode = mode,
|
||||
.rdev = rdev,
|
||||
};
|
||||
|
||||
err = -ENOMEM;
|
||||
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_iput;
|
||||
|
||||
if (!ovl_dentry_is_opaque(dentry)) {
|
||||
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_iput;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting opaque xattr
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||
hardlink);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
|
||||
if (!err)
|
||||
inode = NULL;
|
||||
out_iput:
|
||||
iput(inode);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
/* Don't allow creation of "whiteout" on overlay */
|
||||
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||
return -EPERM;
|
||||
|
||||
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||
}
|
||||
|
||||
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *link)
|
||||
{
|
||||
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||
}
|
||||
|
||||
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
struct dentry *new)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upper;
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upper = ovl_dentry_upper(old);
|
||||
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *whiteout;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir = NULL;
|
||||
int err;
|
||||
int flags = 0;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
if (is_dir) {
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out;
|
||||
} else {
|
||||
LIST_HEAD(list);
|
||||
|
||||
/*
|
||||
* When removing an empty opaque directory, then it
|
||||
* makes no sense to replace it with an exact replica of
|
||||
* itself. But emptiness still needs to be checked.
|
||||
*/
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
ovl_cache_free(&list);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if ((opaquedir && upper != opaquedir) ||
|
||||
(!opaquedir && ovl_dentry_upper(dentry) &&
|
||||
upper != ovl_dentry_upper(dentry))) {
|
||||
goto out_dput_upper;
|
||||
}
|
||||
|
||||
whiteout = ovl_whiteout(workdir, dentry);
|
||||
err = PTR_ERR(whiteout);
|
||||
if (IS_ERR(whiteout))
|
||||
goto out_dput_upper;
|
||||
|
||||
if (d_is_dir(upper))
|
||||
flags = RENAME_EXCHANGE;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
if (flags)
|
||||
ovl_cleanup(wdir, upper);
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
dput(whiteout);
|
||||
out_dput_upper:
|
||||
dput(upper);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
kill_whiteout:
|
||||
ovl_cleanup(wdir, whiteout);
|
||||
goto out_d_drop;
|
||||
}
|
||||
|
||||
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *dir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (upper == ovl_dentry_upper(dentry)) {
|
||||
if (is_dir)
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
}
|
||||
dput(upper);
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
* upperpath/lowerpath, which could only be done if we are the
|
||||
* sole user of this dentry. Too tricky... Just unhash for
|
||||
* now.
|
||||
*/
|
||||
if (!err)
|
||||
d_drop(dentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||
|
||||
if (check_sticky(dir, inode))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
enum ovl_path_type type;
|
||||
int err;
|
||||
|
||||
err = ovl_check_sticky(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
type = ovl_path_type(dentry);
|
||||
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||
err = ovl_remove_upper(dentry, is_dir);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, false);
|
||||
}
|
||||
|
||||
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, true);
|
||||
}
|
||||
|
||||
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type old_type;
|
||||
enum ovl_path_type new_type;
|
||||
struct dentry *old_upperdir;
|
||||
struct dentry *new_upperdir;
|
||||
struct dentry *olddentry;
|
||||
struct dentry *newdentry;
|
||||
struct dentry *trap;
|
||||
bool old_opaque;
|
||||
bool new_opaque;
|
||||
bool new_create = false;
|
||||
bool cleanup_whiteout = false;
|
||||
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||
bool is_dir = S_ISDIR(old->d_inode->i_mode);
|
||||
bool new_is_dir = false;
|
||||
struct dentry *opaquedir = NULL;
|
||||
const struct cred *old_cred = NULL;
|
||||
struct cred *override_cred = NULL;
|
||||
|
||||
err = -EINVAL;
|
||||
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||
goto out;
|
||||
|
||||
flags &= ~RENAME_NOREPLACE;
|
||||
|
||||
err = ovl_check_sticky(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Don't copy up directory trees */
|
||||
old_type = ovl_path_type(old);
|
||||
err = -EXDEV;
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||
goto out;
|
||||
|
||||
if (new->d_inode) {
|
||||
err = ovl_check_sticky(new);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (S_ISDIR(new->d_inode->i_mode))
|
||||
new_is_dir = true;
|
||||
|
||||
new_type = ovl_path_type(new);
|
||||
err = -EXDEV;
|
||||
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_lower(old)->d_inode ==
|
||||
ovl_dentry_lower(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_upper(old)->d_inode ==
|
||||
ovl_dentry_upper(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (ovl_dentry_is_opaque(new))
|
||||
new_type = __OVL_PATH_UPPER;
|
||||
else
|
||||
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||
}
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(new->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
if (!overwrite) {
|
||||
err = ovl_copy_up(new);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
}
|
||||
|
||||
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||
opaquedir = ovl_check_empty_and_clear(new);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir)) {
|
||||
opaquedir = NULL;
|
||||
goto out_revert_creds;
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite) {
|
||||
if (old_opaque) {
|
||||
if (new->d_inode || !new_opaque) {
|
||||
/* Whiteout source */
|
||||
flags |= RENAME_WHITEOUT;
|
||||
} else {
|
||||
/* Switch whiteouts */
|
||||
flags |= RENAME_EXCHANGE;
|
||||
}
|
||||
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||
flags |= RENAME_EXCHANGE;
|
||||
cleanup_whiteout = true;
|
||||
}
|
||||
}
|
||||
|
||||
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||
|
||||
trap = lock_rename(new_upperdir, old_upperdir);
|
||||
|
||||
|
||||
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
|
||||
old->d_name.len);
|
||||
err = PTR_ERR(olddentry);
|
||||
if (IS_ERR(olddentry))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (olddentry != ovl_dentry_upper(old))
|
||||
goto out_dput_old;
|
||||
|
||||
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||
new->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_dput_old;
|
||||
|
||||
err = -ESTALE;
|
||||
if (ovl_dentry_upper(new)) {
|
||||
if (opaquedir) {
|
||||
if (newdentry != opaquedir)
|
||||
goto out_dput;
|
||||
} else {
|
||||
if (newdentry != ovl_dentry_upper(new))
|
||||
goto out_dput;
|
||||
}
|
||||
} else {
|
||||
new_create = true;
|
||||
if (!d_is_negative(newdentry) &&
|
||||
(!new_opaque || !ovl_is_whiteout(newdentry)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (olddentry == trap)
|
||||
goto out_dput;
|
||||
if (newdentry == trap)
|
||||
goto out_dput;
|
||||
|
||||
if (is_dir && !old_opaque && new_opaque) {
|
||||
err = ovl_set_opaque(olddentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
flags);
|
||||
} else {
|
||||
/* No debug for the plain case */
|
||||
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
NULL, flags);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
|
||||
if (old_opaque != new_opaque) {
|
||||
ovl_dentry_set_opaque(old, new_opaque);
|
||||
if (!overwrite)
|
||||
ovl_dentry_set_opaque(new, old_opaque);
|
||||
}
|
||||
|
||||
if (cleanup_whiteout)
|
||||
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent);
|
||||
ovl_dentry_version_inc(new->d_parent);
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_dput_old:
|
||||
dput(olddentry);
|
||||
out_unlock:
|
||||
unlock_rename(new_upperdir, old_upperdir);
|
||||
out_revert_creds:
|
||||
if (old_opaque || new_opaque) {
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
dput(opaquedir);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_rename(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new)
|
||||
{
|
||||
return ovl_rename2(olddir, old, newdir, new, 0);
|
||||
}
|
||||
|
||||
const struct inode_operations_wrapper ovl_dir_inode_operations = {
|
||||
.ops = {
|
||||
.lookup = ovl_lookup,
|
||||
.mkdir = ovl_mkdir,
|
||||
.symlink = ovl_symlink,
|
||||
.unlink = ovl_unlink,
|
||||
.rmdir = ovl_rmdir,
|
||||
.rename = ovl_rename,
|
||||
.link = ovl_link,
|
||||
.setattr = ovl_setattr,
|
||||
.create = ovl_create,
|
||||
.mknod = ovl_mknod,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_dir_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
},
|
||||
.rename2 = ovl_rename2,
|
||||
};
|
||||
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
@ -0,0 +1,442 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
|
||||
bool no_data)
|
||||
{
|
||||
int err;
|
||||
struct dentry *parent;
|
||||
struct kstat stat;
|
||||
struct path lowerpath;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
err = ovl_copy_up(parent);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
ovl_path_lower(dentry, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
if (no_data)
|
||||
stat.size = 0;
|
||||
|
||||
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
|
||||
|
||||
out_dput_parent:
|
||||
dput(parent);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (!err) {
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct path realpath;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
return vfs_getattr(&realpath, stat);
|
||||
}
|
||||
|
||||
int ovl_permission(struct inode *inode, int mask)
|
||||
{
|
||||
struct ovl_entry *oe;
|
||||
struct dentry *alias = NULL;
|
||||
struct inode *realinode;
|
||||
struct dentry *realdentry;
|
||||
bool is_upper;
|
||||
int err;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
oe = inode->i_private;
|
||||
} else if (mask & MAY_NOT_BLOCK) {
|
||||
return -ECHILD;
|
||||
} else {
|
||||
/*
|
||||
* For non-directories find an alias and get the info
|
||||
* from there.
|
||||
*/
|
||||
alias = d_find_any_alias(inode);
|
||||
if (WARN_ON(!alias))
|
||||
return -ENOENT;
|
||||
|
||||
oe = alias->d_fsdata;
|
||||
}
|
||||
|
||||
realdentry = ovl_entry_real(oe, &is_upper);
|
||||
|
||||
/* Careful in RCU walk mode */
|
||||
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||
if (!realinode) {
|
||||
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||
err = -ENOENT;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (mask & MAY_WRITE) {
|
||||
umode_t mode = realinode->i_mode;
|
||||
|
||||
/*
|
||||
* Writes will always be redirected to upper layer, so
|
||||
* ignore lower layer being read-only.
|
||||
*
|
||||
* If the overlay itself is read-only then proceed
|
||||
* with the permission check, don't return EROFS.
|
||||
* This will only happen if this is the lower layer of
|
||||
* another overlayfs.
|
||||
*
|
||||
* If upper fs becomes read-only after the overlay was
|
||||
* constructed return EROFS to prevent modification of
|
||||
* upper layer.
|
||||
*/
|
||||
err = -EROFS;
|
||||
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
err = __inode_permission(realinode, mask);
|
||||
out_dput:
|
||||
dput(alias);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
struct ovl_link_data {
|
||||
struct dentry *realdentry;
|
||||
void *cookie;
|
||||
};
|
||||
|
||||
static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
void *ret;
|
||||
struct dentry *realdentry;
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = NULL;
|
||||
|
||||
realdentry = ovl_dentry_real(dentry);
|
||||
realinode = realdentry->d_inode;
|
||||
|
||||
if (WARN_ON(!realinode->i_op->follow_link))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (realinode->i_op->put_link) {
|
||||
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
|
||||
if (!data)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
data->realdentry = realdentry;
|
||||
}
|
||||
|
||||
ret = realinode->i_op->follow_link(realdentry, nd);
|
||||
if (IS_ERR(ret)) {
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (data)
|
||||
data->cookie = ret;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
|
||||
{
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = c;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
realinode = data->realdentry->d_inode;
|
||||
realinode->i_op->put_link(data->realdentry, nd, data->cookie);
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||
{
|
||||
struct path realpath;
|
||||
struct inode *realinode;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
realinode = realpath.dentry->d_inode;
|
||||
|
||||
if (!realinode->i_op->readlink)
|
||||
return -EINVAL;
|
||||
|
||||
touch_atime(&realpath);
|
||||
|
||||
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||
}
|
||||
|
||||
|
||||
static bool ovl_is_private_xattr(const char *name)
|
||||
{
|
||||
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||
}
|
||||
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
if (ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||
enum ovl_path_type type)
|
||||
{
|
||||
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||
return S_ISDIR(dentry->d_inode->i_mode);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
return -ENODATA;
|
||||
|
||||
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||
}
|
||||
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
ssize_t res;
|
||||
int off;
|
||||
|
||||
res = vfs_listxattr(realpath.dentry, list, size);
|
||||
if (res <= 0 || size == 0)
|
||||
return res;
|
||||
|
||||
if (!ovl_need_xattr_filter(dentry, type))
|
||||
return res;
|
||||
|
||||
/* filter out private xattrs */
|
||||
for (off = 0; off < res;) {
|
||||
char *s = list + off;
|
||||
size_t slen = strlen(s) + 1;
|
||||
|
||||
BUG_ON(off + slen > res);
|
||||
|
||||
if (ovl_is_private_xattr(s)) {
|
||||
res -= slen;
|
||||
memmove(s, s + slen, res - off);
|
||||
} else {
|
||||
off += slen;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -ENODATA;
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
if (!OVL_TYPE_UPPER(type)) {
|
||||
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
err = vfs_removexattr(realpath.dentry, name);
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||
struct dentry *realdentry)
|
||||
{
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
return false;
|
||||
|
||||
if (special_file(realdentry->d_inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ovl_dentry_open(struct dentry *dentry, struct file *file,
|
||||
const struct cred *cred)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type;
|
||||
bool want_write = false;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
if (!ovl_is_nocopyupw(dentry)) {
|
||||
if (ovl_open_need_copy_up(file->f_flags, type,
|
||||
realpath.dentry)) {
|
||||
want_write = true;
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_TRUNC)
|
||||
err = ovl_copy_up_last(dentry, NULL, true);
|
||||
else
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
}
|
||||
|
||||
err = vfs_open(&realpath, file, cred);
|
||||
out_drop_write:
|
||||
if (want_write)
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct inode_operations_wrapper ovl_file_inode_operations = {
|
||||
.ops = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
},
|
||||
.dentry_open = ovl_dentry_open,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.follow_link = ovl_follow_link,
|
||||
.put_link = ovl_put_link,
|
||||
.readlink = ovl_readlink,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
mode &= S_IFMT;
|
||||
|
||||
inode->i_ino = get_next_ino();
|
||||
inode->i_mode = mode;
|
||||
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||
|
||||
switch (mode) {
|
||||
case S_IFDIR:
|
||||
inode->i_private = oe;
|
||||
inode->i_op = &ovl_dir_inode_operations.ops;
|
||||
inode->i_fop = &ovl_dir_operations;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
inode->i_op = &ovl_symlink_inode_operations;
|
||||
break;
|
||||
|
||||
case S_IFREG:
|
||||
case S_IFSOCK:
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
case S_IFIFO:
|
||||
inode->i_op = &ovl_file_inode_operations.ops;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN(1, "illegal file type: %i\n", mode);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
@ -0,0 +1,200 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct ovl_entry;
|
||||
|
||||
enum ovl_path_type {
|
||||
__OVL_PATH_PURE = (1 << 0),
|
||||
__OVL_PATH_UPPER = (1 << 1),
|
||||
__OVL_PATH_MERGE = (1 << 2),
|
||||
};
|
||||
|
||||
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||
|
||||
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||
#define OVL_XATTR_PRE_LEN 16
|
||||
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||
|
||||
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_rmdir(dir, dentry);
|
||||
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_unlink(dir, dentry, NULL);
|
||||
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *new_dentry, bool debug)
|
||||
{
|
||||
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||
if (debug) {
|
||||
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||
old_dentry, new_dentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_create(dir, dentry, mode, true);
|
||||
if (debug)
|
||||
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_mkdir(dir, dentry, mode);
|
||||
if (debug)
|
||||
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t dev, bool debug)
|
||||
{
|
||||
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||
if (debug) {
|
||||
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||
dentry, mode, dev, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *oldname, bool debug)
|
||||
{
|
||||
int err = vfs_symlink(dir, dentry, oldname);
|
||||
if (debug)
|
||||
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||
dentry, name, (int) size, (char *) value, flags, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err = vfs_removexattr(dentry, name);
|
||||
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||
struct inode *newdir, struct dentry *newdentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||
olddentry, newdentry, flags);
|
||||
|
||||
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||
|
||||
if (err) {
|
||||
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||
olddentry, newdentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_whiteout(dir, dentry);
|
||||
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
bool ovl_is_nocopyupw(struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
|
||||
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||
struct kstat *stat, const char *link);
|
||||
|
||||
/* readdir.c */
|
||||
extern const struct file_operations ovl_dir_operations;
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||
void ovl_cache_free(struct list_head *list);
|
||||
|
||||
/* inode.c */
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ovl_permission(struct inode *inode, int mask);
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe);
|
||||
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
{
|
||||
to->i_uid = from->i_uid;
|
||||
to->i_gid = from->i_gid;
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
extern const struct inode_operations_wrapper ovl_dir_inode_operations;
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug);
|
||||
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
@ -0,0 +1,588 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
struct ovl_cache_entry {
|
||||
unsigned int len;
|
||||
unsigned int type;
|
||||
u64 ino;
|
||||
struct list_head l_node;
|
||||
struct rb_node node;
|
||||
struct ovl_cache_entry *next_maybe_whiteout;
|
||||
bool is_whiteout;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct ovl_dir_cache {
|
||||
long refcount;
|
||||
u64 version;
|
||||
struct list_head entries;
|
||||
};
|
||||
|
||||
struct dir_context {
|
||||
const filldir_t actor;
|
||||
//loff_t pos;
|
||||
};
|
||||
|
||||
struct ovl_readdir_data {
|
||||
struct dir_context ctx;
|
||||
bool is_merge;
|
||||
struct rb_root root;
|
||||
struct list_head *list;
|
||||
struct list_head middle;
|
||||
struct ovl_cache_entry *first_maybe_whiteout;
|
||||
int count;
|
||||
int err;
|
||||
};
|
||||
|
||||
struct ovl_dir_file {
|
||||
bool is_real;
|
||||
bool is_upper;
|
||||
struct ovl_dir_cache *cache;
|
||||
struct list_head *cursor;
|
||||
struct file *realfile;
|
||||
struct file *upperfile;
|
||||
};
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||
{
|
||||
return container_of(n, struct ovl_cache_entry, node);
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||
const char *name, int len)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||
|
||||
cmp = strncmp(name, p->name, len);
|
||||
if (cmp > 0)
|
||||
node = p->node.rb_right;
|
||||
else if (cmp < 0 || len < p->len)
|
||||
node = p->node.rb_left;
|
||||
else
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len,
|
||||
u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||
|
||||
p = kmalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p->name, name, len);
|
||||
p->name[len] = '\0';
|
||||
p->len = len;
|
||||
p->type = d_type;
|
||||
p->ino = ino;
|
||||
p->is_whiteout = false;
|
||||
|
||||
if (d_type == DT_CHR) {
|
||||
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct rb_node **newp = &rdd->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
while (*newp) {
|
||||
int cmp;
|
||||
struct ovl_cache_entry *tmp;
|
||||
|
||||
parent = *newp;
|
||||
tmp = ovl_cache_entry_from_node(*newp);
|
||||
cmp = strncmp(name, tmp->name, len);
|
||||
if (cmp > 0)
|
||||
newp = &tmp->node.rb_right;
|
||||
else if (cmp < 0 || len < tmp->len)
|
||||
newp = &tmp->node.rb_left;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
|
||||
if (p == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
list_add_tail(&p->l_node, rdd->list);
|
||||
rb_link_node(&p->node, parent, newp);
|
||||
rb_insert_color(&p->node, &rdd->root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fill_lower(struct ovl_readdir_data *rdd,
|
||||
const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||
if (p) {
|
||||
list_move_tail(&p->l_node, &rdd->middle);
|
||||
} else {
|
||||
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
||||
if (p == NULL)
|
||||
rdd->err = -ENOMEM;
|
||||
else
|
||||
list_add_tail(&p->l_node, &rdd->middle);
|
||||
}
|
||||
|
||||
return rdd->err;
|
||||
}
|
||||
|
||||
void ovl_cache_free(struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
struct ovl_cache_entry *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, list, l_node)
|
||||
kfree(p);
|
||||
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
|
||||
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||
{
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
|
||||
WARN_ON(cache->refcount <= 0);
|
||||
cache->refcount--;
|
||||
if (!cache->refcount) {
|
||||
if (ovl_dir_cache(dentry) == cache)
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_fill_merge(void *buf, const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct dir_context *ctx = buf;
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
rdd->count++;
|
||||
if (!rdd->is_merge)
|
||||
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||
else
|
||||
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
struct dentry *dentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* CAP_DAC_OVERRIDE for lookup
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = mutex_lock_killable(&dir->d_inode->i_mutex);
|
||||
if (!err) {
|
||||
while (rdd->first_maybe_whiteout) {
|
||||
p = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
|
||||
dentry = lookup_one_len(p->name, dir, p->len);
|
||||
if (!IS_ERR(dentry)) {
|
||||
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&dir->d_inode->i_mutex);
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_dir_read(struct path *realpath,
|
||||
struct ovl_readdir_data *rdd)
|
||||
{
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
rdd->first_maybe_whiteout = NULL;
|
||||
//rdd->ctx.pos = 0;
|
||||
do {
|
||||
rdd->count = 0;
|
||||
rdd->err = 0;
|
||||
err = vfs_readdir(realfile, rdd->ctx.actor, rdd);
|
||||
if (err >= 0)
|
||||
err = rdd->err;
|
||||
} while (!err && rdd->count);
|
||||
|
||||
if (!err && rdd->first_maybe_whiteout)
|
||||
err = ovl_check_whiteouts(realpath->dentry, rdd);
|
||||
|
||||
fput(realfile);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_dir_reset(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||
ovl_cache_put(od, dentry);
|
||||
od->cache = NULL;
|
||||
od->cursor = NULL;
|
||||
}
|
||||
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||
od->is_real = false;
|
||||
}
|
||||
|
||||
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_fill_merge,
|
||||
.list = list,
|
||||
.root = RB_ROOT,
|
||||
.is_merge = false,
|
||||
};
|
||||
int idx, next;
|
||||
|
||||
for (idx = 0; idx != -1; idx = next) {
|
||||
next = ovl_path_next(idx, dentry, &realpath);
|
||||
|
||||
if (next != -1) {
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
if (err)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Insert lowest layer entries before upper ones, this
|
||||
* allows offsets to be reasonably constant
|
||||
*/
|
||||
list_add(&rdd.middle, rdd.list);
|
||||
rdd.is_merge = true;
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
list_del(&rdd.middle);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t off = 0;
|
||||
|
||||
list_for_each(p, &od->cache->entries) {
|
||||
if (off >= pos)
|
||||
break;
|
||||
off++;
|
||||
}
|
||||
/* Cursor is safe since the cache is stable */
|
||||
od->cursor = p;
|
||||
}
|
||||
|
||||
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_dir_cache(dentry);
|
||||
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||
cache->refcount++;
|
||||
return cache;
|
||||
}
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||
if (!cache)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cache->refcount = 1;
|
||||
INIT_LIST_HEAD(&cache->entries);
|
||||
|
||||
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||
if (res) {
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
cache->version = ovl_dentry_version_get(dentry);
|
||||
ovl_set_dir_cache(dentry, cache);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct ovl_cache_entry *p;
|
||||
int res;
|
||||
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_readdir(od->realfile, filler, buf);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
if (!od->cache) {
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_cache_get(dentry);
|
||||
if (IS_ERR(cache))
|
||||
return PTR_ERR(cache);
|
||||
|
||||
od->cache = cache;
|
||||
ovl_seek_cursor(od, file->f_pos);
|
||||
}
|
||||
|
||||
while (od->cursor != &od->cache->entries) {
|
||||
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||
if (!p->is_whiteout)
|
||||
if (filler(buf, p->name, p->len, file->f_pos, p->ino, p->type))
|
||||
break;
|
||||
od->cursor = p->l_node.next;
|
||||
file->f_pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
loff_t res;
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
mutex_lock(&file_inode(file)->i_mutex);
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_llseek(od->realfile, offset, origin);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
} else {
|
||||
res = -EINVAL;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
case SEEK_SET:
|
||||
break;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (offset < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
if (od->cache)
|
||||
ovl_seek_cursor(od, offset);
|
||||
}
|
||||
res = offset;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&file_inode(file)->i_mutex);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
|
||||
/*
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = lockless_dereference(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||
smp_mb__before_spinlock();
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->upperfile = realfile;
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return vfs_fsync_range(realfile, start, end, datasync);
|
||||
}
|
||||
|
||||
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
if (od->cache) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ovl_cache_put(od, file->f_path.dentry);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
fput(od->realfile);
|
||||
if (od->upperfile)
|
||||
fput(od->upperfile);
|
||||
kfree(od);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct path realpath;
|
||||
struct file *realfile;
|
||||
struct ovl_dir_file *od;
|
||||
enum ovl_path_type type;
|
||||
|
||||
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||
if (!od)
|
||||
return -ENOMEM;
|
||||
|
||||
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||
if (IS_ERR(realfile)) {
|
||||
kfree(od);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->realfile = realfile;
|
||||
od->is_real = !OVL_TYPE_MERGE(type);
|
||||
od->is_upper = OVL_TYPE_UPPER(type);
|
||||
file->private_data = od;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ovl_dir_operations = {
|
||||
.read = generic_read_dir,
|
||||
.open = ovl_dir_open,
|
||||
.readdir = ovl_readdir,
|
||||
.llseek = ovl_dir_llseek,
|
||||
.fsync = ovl_dir_fsync,
|
||||
.release = ovl_dir_release,
|
||||
};
|
||||
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
err = ovl_dir_read_merged(dentry, list);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = 0;
|
||||
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
if (p->is_whiteout)
|
||||
continue;
|
||||
|
||||
if (p->name[0] == '.') {
|
||||
if (p->len == 1)
|
||||
continue;
|
||||
if (p->len == 2 && p->name[1] == '.')
|
||||
continue;
|
||||
}
|
||||
err = -ENOTEMPTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
struct dentry *dentry;
|
||||
|
||||
if (!p->is_whiteout)
|
||||
continue;
|
||||
|
||||
dentry = lookup_one_len(p->name, upper, p->len);
|
||||
if (IS_ERR(dentry)) {
|
||||
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||
upper->d_name.name, p->len, p->name,
|
||||
(int) PTR_ERR(dentry));
|
||||
continue;
|
||||
}
|
||||
ovl_cleanup(upper->d_inode, dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
mutex_unlock(&upper->d_inode->i_mutex);
|
||||
}
|
||||
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
File diff suppressed because it is too large
Load Diff
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
@ -0,0 +1,21 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
KMODDIR = @KMODDIR@
|
||||
src = @abs_srcdir@
|
||||
|
||||
obj-m += mcoverlay.o
|
||||
|
||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||
|
||||
416
executer/kernel/mcoverlayfs/linux-4.0.9/copy_up.c
Normal file
416
executer/kernel/mcoverlayfs/linux-4.0.9/copy_up.c
Normal file
@ -0,0 +1,416 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/namei.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||
{
|
||||
ssize_t list_size, size;
|
||||
char *buf, *name, *value;
|
||||
int error;
|
||||
|
||||
if (!old->d_inode->i_op->getxattr ||
|
||||
!new->d_inode->i_op->getxattr)
|
||||
return 0;
|
||||
|
||||
list_size = vfs_listxattr(old, NULL, 0);
|
||||
if (list_size <= 0) {
|
||||
if (list_size == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return list_size;
|
||||
}
|
||||
|
||||
buf = kzalloc(list_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
error = -ENOMEM;
|
||||
value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
|
||||
if (!value)
|
||||
goto out;
|
||||
|
||||
list_size = vfs_listxattr(old, buf, list_size);
|
||||
if (list_size <= 0) {
|
||||
error = list_size;
|
||||
goto out_free_value;
|
||||
}
|
||||
|
||||
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||
size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
|
||||
if (size <= 0) {
|
||||
error = size;
|
||||
goto out_free_value;
|
||||
}
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error)
|
||||
goto out_free_value;
|
||||
}
|
||||
|
||||
out_free_value:
|
||||
kfree(value);
|
||||
out:
|
||||
kfree(buf);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||
{
|
||||
struct file *old_file;
|
||||
struct file *new_file;
|
||||
loff_t old_pos = 0;
|
||||
loff_t new_pos = 0;
|
||||
int error = 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
old_file = ovl_path_open(old, O_RDONLY);
|
||||
if (IS_ERR(old_file))
|
||||
return PTR_ERR(old_file);
|
||||
|
||||
new_file = ovl_path_open(new, O_WRONLY);
|
||||
if (IS_ERR(new_file)) {
|
||||
error = PTR_ERR(new_file);
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
/* FIXME: copy up sparse files efficiently */
|
||||
while (len) {
|
||||
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||
long bytes;
|
||||
|
||||
if (len < this_len)
|
||||
this_len = len;
|
||||
|
||||
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||
error = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
bytes = do_splice_direct(old_file, &old_pos,
|
||||
new_file, &new_pos,
|
||||
this_len, SPLICE_F_MOVE);
|
||||
if (bytes <= 0) {
|
||||
error = bytes;
|
||||
break;
|
||||
}
|
||||
WARN_ON(old_pos != new_pos);
|
||||
|
||||
len -= bytes;
|
||||
}
|
||||
|
||||
fput(new_file);
|
||||
out_fput:
|
||||
fput(old_file);
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||
{
|
||||
int res;
|
||||
char *buf;
|
||||
struct inode *inode = realdentry->d_inode;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
res = -EINVAL;
|
||||
if (!inode->i_op->readlink)
|
||||
goto err;
|
||||
|
||||
res = -ENOMEM;
|
||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
old_fs = get_fs();
|
||||
set_fs(get_ds());
|
||||
/* The cast to a user pointer is valid due to the set_fs() */
|
||||
res = inode->i_op->readlink(realdentry,
|
||||
(char __user *)buf, PAGE_SIZE - 1);
|
||||
set_fs(old_fs);
|
||||
if (res < 0) {
|
||||
free_page((unsigned long) buf);
|
||||
goto err;
|
||||
}
|
||||
buf[res] = '\0';
|
||||
|
||||
return buf;
|
||||
|
||||
err:
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid =
|
||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||
.ia_atime = stat->atime,
|
||||
.ia_mtime = stat->mtime,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!S_ISLNK(stat->mode)) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_MODE,
|
||||
.ia_mode = stat->mode,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_UID | ATTR_GID,
|
||||
.ia_uid = stat->uid,
|
||||
.ia_gid = stat->gid,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err)
|
||||
ovl_set_timestamps(upperdentry, stat);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||
struct dentry *dentry, struct path *lowerpath,
|
||||
struct kstat *stat, struct iattr *attr,
|
||||
const char *link)
|
||||
{
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry = NULL;
|
||||
struct dentry *upper = NULL;
|
||||
umode_t mode = stat->mode;
|
||||
int err;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out1;
|
||||
|
||||
/* Can't properly set mode on creation because of the umask */
|
||||
stat->mode &= S_IFMT;
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||
stat->mode = mode;
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
if (S_ISREG(stat->mode)) {
|
||||
struct path upperpath;
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = newdentry;
|
||||
|
||||
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(lowerpath->dentry, newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&newdentry->d_inode->i_mutex);
|
||||
err = ovl_set_attr(newdentry, stat);
|
||||
if (!err && attr)
|
||||
err = notify_change(newdentry, attr, NULL);
|
||||
mutex_unlock(&newdentry->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
newdentry = NULL;
|
||||
|
||||
/*
|
||||
* Non-directores become opaque when copied up.
|
||||
*/
|
||||
if (!S_ISDIR(stat->mode))
|
||||
ovl_dentry_set_opaque(dentry, true);
|
||||
out2:
|
||||
dput(upper);
|
||||
out1:
|
||||
dput(newdentry);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy up a single dentry
|
||||
*
|
||||
* Directory renames only allowed on "pure upper" (already created on
|
||||
* upper filesystem, never copied up). Directories which are on lower or
|
||||
* are merged may not be renamed. For these -EXDEV is returned and
|
||||
* userspace has to deal with it. This means, when copying up a
|
||||
* directory we can rely on it and ancestors being stable.
|
||||
*
|
||||
* Non-directory renames start with copy up of source if necessary. The
|
||||
* actual rename will only proceed once the copy up was successful. Copy
|
||||
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||
* d_parent it is possible that the copy up will lock the old parent. At
|
||||
* that point the file will have already been copied up anyway.
|
||||
*/
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
int err;
|
||||
struct kstat pstat;
|
||||
struct path parentpath;
|
||||
struct dentry *upperdir;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
char *link = NULL;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
upperdir = parentpath.dentry;
|
||||
|
||||
err = vfs_getattr(&parentpath, &pstat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (S_ISLNK(stat->mode)) {
|
||||
link = ovl_read_symlink(lowerpath->dentry);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_free_link;
|
||||
|
||||
override_cred->fsuid = stat->uid;
|
||||
override_cred->fsgid = stat->gid;
|
||||
/*
|
||||
* CAP_SYS_ADMIN for copying up extended attributes
|
||||
* CAP_DAC_OVERRIDE for create
|
||||
* CAP_FOWNER for chmod, timestamp update
|
||||
* CAP_FSETID for chmod
|
||||
* CAP_CHOWN for chown
|
||||
* CAP_MKNOD for mknod
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = -EIO;
|
||||
if (lock_rename(workdir, upperdir) != NULL) {
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
unlock_rename(workdir, upperdir);
|
||||
err = 0;
|
||||
/* Raced with another copy-up? Do the setattr here */
|
||||
if (attr) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
goto out_put_cred;
|
||||
}
|
||||
|
||||
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||
stat, attr, link);
|
||||
if (!err) {
|
||||
/* Restore timestamps on parent (best effort) */
|
||||
ovl_set_timestamps(upperdir, &pstat);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_put_cred:
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
out_free_link:
|
||||
if (link)
|
||||
free_page((unsigned long) link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
struct dentry *next;
|
||||
struct dentry *parent;
|
||||
struct path lowerpath;
|
||||
struct kstat stat;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
/* find the topmost dentry not yet copied up */
|
||||
for (;;) {
|
||||
parent = dget_parent(next);
|
||||
|
||||
type = ovl_path_type(parent);
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
dput(next);
|
||||
next = parent;
|
||||
}
|
||||
|
||||
ovl_path_lower(next, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (!err)
|
||||
err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
|
||||
|
||||
dput(parent);
|
||||
dput(next);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
951
executer/kernel/mcoverlayfs/linux-4.0.9/dir.c
Normal file
951
executer/kernel/mcoverlayfs/linux-4.0.9/dir.c
Normal file
@ -0,0 +1,951 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
dget(wdentry);
|
||||
if (d_is_dir(wdentry))
|
||||
err = ovl_do_rmdir(wdir, wdentry);
|
||||
else
|
||||
err = ovl_do_unlink(wdir, wdentry);
|
||||
dput(wdentry);
|
||||
|
||||
if (err) {
|
||||
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||
wdentry, err);
|
||||
}
|
||||
}
|
||||
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||
{
|
||||
struct dentry *temp;
|
||||
char name[20];
|
||||
|
||||
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||
|
||||
temp = lookup_one_len(name, workdir, strlen(name));
|
||||
if (!IS_ERR(temp) && temp->d_inode) {
|
||||
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||
dput(temp);
|
||||
temp = ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* caller holds i_mutex on workdir */
|
||||
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *whiteout;
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
|
||||
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||
if (IS_ERR(whiteout))
|
||||
return whiteout;
|
||||
|
||||
err = ovl_do_whiteout(wdir, whiteout);
|
||||
if (err) {
|
||||
dput(whiteout);
|
||||
whiteout = ERR_PTR(err);
|
||||
}
|
||||
|
||||
return whiteout;
|
||||
}
|
||||
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (newdentry->d_inode)
|
||||
return -ESTALE;
|
||||
|
||||
if (hardlink) {
|
||||
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||
} else {
|
||||
switch (stat->mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFCHR:
|
||||
case S_IFBLK:
|
||||
case S_IFIFO:
|
||||
case S_IFSOCK:
|
||||
err = ovl_do_mknod(dir, newdentry,
|
||||
stat->mode, stat->rdev, debug);
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EPERM;
|
||||
}
|
||||
}
|
||||
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||
/*
|
||||
* Not quite sure if non-instantiated dentry is legal or not.
|
||||
* VFS doesn't seem to care so check and warn here.
|
||||
*/
|
||||
err = -ENOENT;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||
}
|
||||
|
||||
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||
if (err) {
|
||||
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||
upperdentry->d_name.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type type;
|
||||
struct path realpath;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, stat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
stat->dev = dentry->d_sb->s_dev;
|
||||
stat->ino = dentry->d_inode->i_ino;
|
||||
|
||||
/*
|
||||
* It's probably not worth it to count subdirs to get the
|
||||
* correct link count. nlink=1 seems to pacify 'find' and
|
||||
* other utilities.
|
||||
*/
|
||||
if (OVL_TYPE_MERGE(type))
|
||||
stat->nlink = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
|
||||
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&udir->i_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||
struct dentry *upperdir)
|
||||
{
|
||||
/* Workdir should not be the same as upperdir */
|
||||
if (workdir == upperdir)
|
||||
goto err;
|
||||
|
||||
/* Workdir should not be subdir of upperdir and vice versa */
|
||||
if (lock_rename(workdir, upperdir) != NULL)
|
||||
goto err_unlock;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct path upperpath;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir;
|
||||
struct kstat stat;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
err = vfs_getattr(&upperpath, &stat);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (!S_ISDIR(stat.mode))
|
||||
goto out_unlock;
|
||||
upper = upperpath.dentry;
|
||||
if (upper->d_parent->d_inode != udir)
|
||||
goto out_unlock;
|
||||
|
||||
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out_unlock;
|
||||
|
||||
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_copy_xattr(upper, opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_set_opaque(opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&opaquedir->d_inode->i_mutex);
|
||||
err = ovl_set_attr(opaquedir, &stat);
|
||||
mutex_unlock(&opaquedir->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup_whiteouts(upper, list);
|
||||
ovl_cleanup(wdir, upper);
|
||||
unlock_rename(workdir, upperdir);
|
||||
|
||||
/* dentry's upper doesn't match now, get rid of it */
|
||||
d_drop(dentry);
|
||||
|
||||
return opaquedir;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, opaquedir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *ret = NULL;
|
||||
LIST_HEAD(list);
|
||||
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
if (err)
|
||||
ret = ERR_PTR(err);
|
||||
else {
|
||||
/*
|
||||
* If no upperdentry then skip clearing whiteouts.
|
||||
*
|
||||
* Can race with copy-up, since we don't hold the upperdir
|
||||
* mutex. Doesn't matter, since copy-up can't create a
|
||||
* non-empty directory from an empty one.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry))
|
||||
ret = ovl_clear_empty(dentry, &list);
|
||||
}
|
||||
|
||||
ovl_cache_free(&list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||
if (err)
|
||||
goto out_dput2;
|
||||
|
||||
if (S_ISDIR(stat->mode)) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||
RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup(wdir, upper);
|
||||
} else {
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput2:
|
||||
dput(upper);
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out_dput2;
|
||||
}
|
||||
|
||||
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link, struct dentry *hardlink)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
struct kstat stat = {
|
||||
.mode = mode,
|
||||
.rdev = rdev,
|
||||
};
|
||||
|
||||
err = -ENOMEM;
|
||||
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_iput;
|
||||
|
||||
if (!ovl_dentry_is_opaque(dentry)) {
|
||||
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_iput;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting opaque xattr
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||
hardlink);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
|
||||
if (!err)
|
||||
inode = NULL;
|
||||
out_iput:
|
||||
iput(inode);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
/* Don't allow creation of "whiteout" on overlay */
|
||||
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||
return -EPERM;
|
||||
|
||||
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||
}
|
||||
|
||||
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *link)
|
||||
{
|
||||
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||
}
|
||||
|
||||
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
struct dentry *new)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upper;
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upper = ovl_dentry_upper(old);
|
||||
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *whiteout;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir = NULL;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
if (is_dir) {
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out;
|
||||
} else {
|
||||
LIST_HEAD(list);
|
||||
|
||||
/*
|
||||
* When removing an empty opaque directory, then it
|
||||
* makes no sense to replace it with an exact replica of
|
||||
* itself. But emptiness still needs to be checked.
|
||||
*/
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
ovl_cache_free(&list);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
whiteout = ovl_whiteout(workdir, dentry);
|
||||
err = PTR_ERR(whiteout);
|
||||
if (IS_ERR(whiteout))
|
||||
goto out_unlock;
|
||||
|
||||
upper = ovl_dentry_upper(dentry);
|
||||
if (!upper) {
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto kill_whiteout;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
|
||||
dput(upper);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
} else {
|
||||
int flags = 0;
|
||||
|
||||
if (opaquedir)
|
||||
upper = opaquedir;
|
||||
err = -ESTALE;
|
||||
if (upper->d_parent != upperdir)
|
||||
goto kill_whiteout;
|
||||
|
||||
if (is_dir)
|
||||
flags |= RENAME_EXCHANGE;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
|
||||
if (is_dir)
|
||||
ovl_cleanup(wdir, upper);
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
dput(whiteout);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
kill_whiteout:
|
||||
ovl_cleanup(wdir, whiteout);
|
||||
goto out_d_drop;
|
||||
}
|
||||
|
||||
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *dir = upperdir->d_inode;
|
||||
struct dentry *upper = ovl_dentry_upper(dentry);
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
|
||||
err = -ESTALE;
|
||||
if (upper->d_parent == upperdir) {
|
||||
/* Don't let d_delete() think it can reset d_inode */
|
||||
dget(upper);
|
||||
if (is_dir)
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
dput(upper);
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
* upperpath/lowerpath, which could only be done if we are the
|
||||
* sole user of this dentry. Too tricky... Just unhash for
|
||||
* now.
|
||||
*/
|
||||
d_drop(dentry);
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||
|
||||
if (check_sticky(dir, inode))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
enum ovl_path_type type;
|
||||
int err;
|
||||
|
||||
err = ovl_check_sticky(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
type = ovl_path_type(dentry);
|
||||
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||
err = ovl_remove_upper(dentry, is_dir);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, false);
|
||||
}
|
||||
|
||||
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, true);
|
||||
}
|
||||
|
||||
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type old_type;
|
||||
enum ovl_path_type new_type;
|
||||
struct dentry *old_upperdir;
|
||||
struct dentry *new_upperdir;
|
||||
struct dentry *olddentry;
|
||||
struct dentry *newdentry;
|
||||
struct dentry *trap;
|
||||
bool old_opaque;
|
||||
bool new_opaque;
|
||||
bool new_create = false;
|
||||
bool cleanup_whiteout = false;
|
||||
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||
bool is_dir = d_is_dir(old);
|
||||
bool new_is_dir = false;
|
||||
struct dentry *opaquedir = NULL;
|
||||
const struct cred *old_cred = NULL;
|
||||
struct cred *override_cred = NULL;
|
||||
|
||||
err = -EINVAL;
|
||||
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||
goto out;
|
||||
|
||||
flags &= ~RENAME_NOREPLACE;
|
||||
|
||||
err = ovl_check_sticky(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Don't copy up directory trees */
|
||||
old_type = ovl_path_type(old);
|
||||
err = -EXDEV;
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||
goto out;
|
||||
|
||||
if (new->d_inode) {
|
||||
err = ovl_check_sticky(new);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (d_is_dir(new))
|
||||
new_is_dir = true;
|
||||
|
||||
new_type = ovl_path_type(new);
|
||||
err = -EXDEV;
|
||||
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_lower(old)->d_inode ==
|
||||
ovl_dentry_lower(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_upper(old)->d_inode ==
|
||||
ovl_dentry_upper(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (ovl_dentry_is_opaque(new))
|
||||
new_type = __OVL_PATH_UPPER;
|
||||
else
|
||||
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||
}
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(new->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
if (!overwrite) {
|
||||
err = ovl_copy_up(new);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
}
|
||||
|
||||
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||
opaquedir = ovl_check_empty_and_clear(new);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir)) {
|
||||
opaquedir = NULL;
|
||||
goto out_revert_creds;
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite) {
|
||||
if (old_opaque) {
|
||||
if (new->d_inode || !new_opaque) {
|
||||
/* Whiteout source */
|
||||
flags |= RENAME_WHITEOUT;
|
||||
} else {
|
||||
/* Switch whiteouts */
|
||||
flags |= RENAME_EXCHANGE;
|
||||
}
|
||||
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||
flags |= RENAME_EXCHANGE;
|
||||
cleanup_whiteout = true;
|
||||
}
|
||||
}
|
||||
|
||||
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||
|
||||
trap = lock_rename(new_upperdir, old_upperdir);
|
||||
|
||||
olddentry = ovl_dentry_upper(old);
|
||||
newdentry = ovl_dentry_upper(new);
|
||||
if (newdentry) {
|
||||
if (opaquedir) {
|
||||
newdentry = opaquedir;
|
||||
opaquedir = NULL;
|
||||
} else {
|
||||
dget(newdentry);
|
||||
}
|
||||
} else {
|
||||
new_create = true;
|
||||
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||
new->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = -ESTALE;
|
||||
if (olddentry->d_parent != old_upperdir)
|
||||
goto out_dput;
|
||||
if (newdentry->d_parent != new_upperdir)
|
||||
goto out_dput;
|
||||
if (olddentry == trap)
|
||||
goto out_dput;
|
||||
if (newdentry == trap)
|
||||
goto out_dput;
|
||||
|
||||
if (is_dir && !old_opaque && new_opaque) {
|
||||
err = ovl_set_opaque(olddentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
flags);
|
||||
} else {
|
||||
/* No debug for the plain case */
|
||||
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
NULL, flags);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
|
||||
if (old_opaque != new_opaque) {
|
||||
ovl_dentry_set_opaque(old, new_opaque);
|
||||
if (!overwrite)
|
||||
ovl_dentry_set_opaque(new, old_opaque);
|
||||
}
|
||||
|
||||
if (cleanup_whiteout)
|
||||
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent);
|
||||
ovl_dentry_version_inc(new->d_parent);
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(new_upperdir, old_upperdir);
|
||||
out_revert_creds:
|
||||
if (old_opaque || new_opaque) {
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
dput(opaquedir);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct inode_operations ovl_dir_inode_operations = {
|
||||
.lookup = ovl_lookup,
|
||||
.mkdir = ovl_mkdir,
|
||||
.symlink = ovl_symlink,
|
||||
.unlink = ovl_unlink,
|
||||
.rmdir = ovl_rmdir,
|
||||
.rename2 = ovl_rename2,
|
||||
.link = ovl_link,
|
||||
.setattr = ovl_setattr,
|
||||
.create = ovl_create,
|
||||
.mknod = ovl_mknod,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_dir_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
438
executer/kernel/mcoverlayfs/linux-4.0.9/inode.c
Normal file
438
executer/kernel/mcoverlayfs/linux-4.0.9/inode.c
Normal file
@ -0,0 +1,438 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
|
||||
bool no_data)
|
||||
{
|
||||
int err;
|
||||
struct dentry *parent;
|
||||
struct kstat stat;
|
||||
struct path lowerpath;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
err = ovl_copy_up(parent);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
ovl_path_lower(dentry, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
if (no_data)
|
||||
stat.size = 0;
|
||||
|
||||
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
|
||||
|
||||
out_dput_parent:
|
||||
dput(parent);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
} else {
|
||||
err = ovl_copy_up_last(dentry, attr, false);
|
||||
}
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct path realpath;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
return vfs_getattr(&realpath, stat);
|
||||
}
|
||||
|
||||
int ovl_permission(struct inode *inode, int mask)
|
||||
{
|
||||
struct ovl_entry *oe;
|
||||
struct dentry *alias = NULL;
|
||||
struct inode *realinode;
|
||||
struct dentry *realdentry;
|
||||
bool is_upper;
|
||||
int err;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
oe = inode->i_private;
|
||||
} else if (mask & MAY_NOT_BLOCK) {
|
||||
return -ECHILD;
|
||||
} else {
|
||||
/*
|
||||
* For non-directories find an alias and get the info
|
||||
* from there.
|
||||
*/
|
||||
alias = d_find_any_alias(inode);
|
||||
if (WARN_ON(!alias))
|
||||
return -ENOENT;
|
||||
|
||||
oe = alias->d_fsdata;
|
||||
}
|
||||
|
||||
realdentry = ovl_entry_real(oe, &is_upper);
|
||||
|
||||
/* Careful in RCU walk mode */
|
||||
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||
if (!realinode) {
|
||||
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||
err = -ENOENT;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (mask & MAY_WRITE) {
|
||||
umode_t mode = realinode->i_mode;
|
||||
|
||||
/*
|
||||
* Writes will always be redirected to upper layer, so
|
||||
* ignore lower layer being read-only.
|
||||
*
|
||||
* If the overlay itself is read-only then proceed
|
||||
* with the permission check, don't return EROFS.
|
||||
* This will only happen if this is the lower layer of
|
||||
* another overlayfs.
|
||||
*
|
||||
* If upper fs becomes read-only after the overlay was
|
||||
* constructed return EROFS to prevent modification of
|
||||
* upper layer.
|
||||
*/
|
||||
err = -EROFS;
|
||||
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
err = __inode_permission(realinode, mask);
|
||||
out_dput:
|
||||
dput(alias);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
struct ovl_link_data {
|
||||
struct dentry *realdentry;
|
||||
void *cookie;
|
||||
};
|
||||
|
||||
static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
void *ret;
|
||||
struct dentry *realdentry;
|
||||
struct inode *realinode;
|
||||
|
||||
realdentry = ovl_dentry_real(dentry);
|
||||
realinode = realdentry->d_inode;
|
||||
|
||||
if (WARN_ON(!realinode->i_op->follow_link))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
ret = realinode->i_op->follow_link(realdentry, nd);
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
if (realinode->i_op->put_link) {
|
||||
struct ovl_link_data *data;
|
||||
|
||||
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
|
||||
if (!data) {
|
||||
realinode->i_op->put_link(realdentry, nd, ret);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
data->realdentry = realdentry;
|
||||
data->cookie = ret;
|
||||
|
||||
return data;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
|
||||
{
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = c;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
realinode = data->realdentry->d_inode;
|
||||
realinode->i_op->put_link(data->realdentry, nd, data->cookie);
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||
{
|
||||
struct path realpath;
|
||||
struct inode *realinode;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
realinode = realpath.dentry->d_inode;
|
||||
|
||||
if (!realinode->i_op->readlink)
|
||||
return -EINVAL;
|
||||
|
||||
touch_atime(&realpath);
|
||||
|
||||
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||
}
|
||||
|
||||
|
||||
static bool ovl_is_private_xattr(const char *name)
|
||||
{
|
||||
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||
}
|
||||
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
if (ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||
enum ovl_path_type type)
|
||||
{
|
||||
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||
return S_ISDIR(dentry->d_inode->i_mode);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
return -ENODATA;
|
||||
|
||||
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||
}
|
||||
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
ssize_t res;
|
||||
int off;
|
||||
|
||||
res = vfs_listxattr(realpath.dentry, list, size);
|
||||
if (res <= 0 || size == 0)
|
||||
return res;
|
||||
|
||||
if (!ovl_need_xattr_filter(dentry, type))
|
||||
return res;
|
||||
|
||||
/* filter out private xattrs */
|
||||
for (off = 0; off < res;) {
|
||||
char *s = list + off;
|
||||
size_t slen = strlen(s) + 1;
|
||||
|
||||
BUG_ON(off + slen > res);
|
||||
|
||||
if (ovl_is_private_xattr(s)) {
|
||||
res -= slen;
|
||||
memmove(s, s + slen, res - off);
|
||||
} else {
|
||||
off += slen;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -ENODATA;
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
if (!OVL_TYPE_UPPER(type)) {
|
||||
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
err = vfs_removexattr(realpath.dentry, name);
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||
struct dentry *realdentry)
|
||||
{
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
return false;
|
||||
|
||||
if (special_file(realdentry->d_inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ovl_dentry_open(struct dentry *dentry, struct file *file,
|
||||
const struct cred *cred)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type;
|
||||
bool want_write = false;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
if (!ovl_is_nocopyupw(dentry)) {
|
||||
if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
|
||||
want_write = true;
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_TRUNC)
|
||||
err = ovl_copy_up_last(dentry, NULL, true);
|
||||
else
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
}
|
||||
|
||||
err = vfs_open(&realpath, file, cred);
|
||||
out_drop_write:
|
||||
if (want_write)
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct inode_operations ovl_file_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
.dentry_open = ovl_dentry_open,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.follow_link = ovl_follow_link,
|
||||
.put_link = ovl_put_link,
|
||||
.readlink = ovl_readlink,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
mode &= S_IFMT;
|
||||
|
||||
inode->i_ino = get_next_ino();
|
||||
inode->i_mode = mode;
|
||||
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||
|
||||
switch (mode) {
|
||||
case S_IFDIR:
|
||||
inode->i_private = oe;
|
||||
inode->i_op = &ovl_dir_inode_operations;
|
||||
inode->i_fop = &ovl_dir_operations;
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
inode->i_op = &ovl_symlink_inode_operations;
|
||||
break;
|
||||
|
||||
case S_IFREG:
|
||||
case S_IFSOCK:
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
case S_IFIFO:
|
||||
inode->i_op = &ovl_file_inode_operations;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN(1, "illegal file type: %i\n", mode);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
200
executer/kernel/mcoverlayfs/linux-4.0.9/overlayfs.h
Normal file
200
executer/kernel/mcoverlayfs/linux-4.0.9/overlayfs.h
Normal file
@ -0,0 +1,200 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct ovl_entry;
|
||||
|
||||
enum ovl_path_type {
|
||||
__OVL_PATH_PURE = (1 << 0),
|
||||
__OVL_PATH_UPPER = (1 << 1),
|
||||
__OVL_PATH_MERGE = (1 << 2),
|
||||
};
|
||||
|
||||
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||
|
||||
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||
#define OVL_XATTR_PRE_LEN 16
|
||||
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||
|
||||
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_rmdir(dir, dentry);
|
||||
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_unlink(dir, dentry, NULL);
|
||||
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *new_dentry, bool debug)
|
||||
{
|
||||
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||
if (debug) {
|
||||
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||
old_dentry, new_dentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_create(dir, dentry, mode, true);
|
||||
if (debug)
|
||||
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_mkdir(dir, dentry, mode);
|
||||
if (debug)
|
||||
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t dev, bool debug)
|
||||
{
|
||||
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||
if (debug) {
|
||||
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||
dentry, mode, dev, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *oldname, bool debug)
|
||||
{
|
||||
int err = vfs_symlink(dir, dentry, oldname);
|
||||
if (debug)
|
||||
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||
dentry, name, (int) size, (char *) value, flags, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err = vfs_removexattr(dentry, name);
|
||||
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||
struct inode *newdir, struct dentry *newdentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||
olddentry, newdentry, flags);
|
||||
|
||||
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||
|
||||
if (err) {
|
||||
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||
olddentry, newdentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_whiteout(dir, dentry);
|
||||
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
bool ovl_is_nocopyupw(struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
|
||||
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||
struct kstat *stat, const char *link);
|
||||
|
||||
/* readdir.c */
|
||||
extern const struct file_operations ovl_dir_operations;
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||
void ovl_cache_free(struct list_head *list);
|
||||
|
||||
/* inode.c */
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ovl_permission(struct inode *inode, int mask);
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe);
|
||||
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
{
|
||||
to->i_uid = from->i_uid;
|
||||
to->i_gid = from->i_gid;
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
extern const struct inode_operations ovl_dir_inode_operations;
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug);
|
||||
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
557
executer/kernel/mcoverlayfs/linux-4.0.9/readdir.c
Normal file
557
executer/kernel/mcoverlayfs/linux-4.0.9/readdir.c
Normal file
@ -0,0 +1,557 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
struct ovl_cache_entry {
|
||||
unsigned int len;
|
||||
unsigned int type;
|
||||
u64 ino;
|
||||
struct list_head l_node;
|
||||
struct rb_node node;
|
||||
bool is_whiteout;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct ovl_dir_cache {
|
||||
long refcount;
|
||||
u64 version;
|
||||
struct list_head entries;
|
||||
};
|
||||
|
||||
struct ovl_readdir_data {
|
||||
struct dir_context ctx;
|
||||
bool is_merge;
|
||||
struct rb_root root;
|
||||
struct list_head *list;
|
||||
struct list_head middle;
|
||||
struct dentry *dir;
|
||||
int count;
|
||||
int err;
|
||||
};
|
||||
|
||||
struct ovl_dir_file {
|
||||
bool is_real;
|
||||
bool is_upper;
|
||||
struct ovl_dir_cache *cache;
|
||||
struct list_head *cursor;
|
||||
struct file *realfile;
|
||||
struct file *upperfile;
|
||||
};
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||
{
|
||||
return container_of(n, struct ovl_cache_entry, node);
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||
const char *name, int len)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||
|
||||
cmp = strncmp(name, p->name, len);
|
||||
if (cmp > 0)
|
||||
node = p->node.rb_right;
|
||||
else if (cmp < 0 || len < p->len)
|
||||
node = p->node.rb_left;
|
||||
else
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
|
||||
const char *name, int len,
|
||||
u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||
|
||||
p = kmalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p->name, name, len);
|
||||
p->name[len] = '\0';
|
||||
p->len = len;
|
||||
p->type = d_type;
|
||||
p->ino = ino;
|
||||
p->is_whiteout = false;
|
||||
|
||||
if (d_type == DT_CHR) {
|
||||
struct dentry *dentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred) {
|
||||
kfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* CAP_DAC_OVERRIDE for lookup
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
dentry = lookup_one_len(name, dir, len);
|
||||
if (!IS_ERR(dentry)) {
|
||||
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct rb_node **newp = &rdd->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
while (*newp) {
|
||||
int cmp;
|
||||
struct ovl_cache_entry *tmp;
|
||||
|
||||
parent = *newp;
|
||||
tmp = ovl_cache_entry_from_node(*newp);
|
||||
cmp = strncmp(name, tmp->name, len);
|
||||
if (cmp > 0)
|
||||
newp = &tmp->node.rb_right;
|
||||
else if (cmp < 0 || len < tmp->len)
|
||||
newp = &tmp->node.rb_left;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type);
|
||||
if (p == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
list_add_tail(&p->l_node, rdd->list);
|
||||
rb_link_node(&p->node, parent, newp);
|
||||
rb_insert_color(&p->node, &rdd->root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fill_lower(struct ovl_readdir_data *rdd,
|
||||
const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||
if (p) {
|
||||
list_move_tail(&p->l_node, &rdd->middle);
|
||||
} else {
|
||||
p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type);
|
||||
if (p == NULL)
|
||||
rdd->err = -ENOMEM;
|
||||
else
|
||||
list_add_tail(&p->l_node, &rdd->middle);
|
||||
}
|
||||
|
||||
return rdd->err;
|
||||
}
|
||||
|
||||
void ovl_cache_free(struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
struct ovl_cache_entry *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, list, l_node)
|
||||
kfree(p);
|
||||
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
|
||||
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||
{
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
|
||||
WARN_ON(cache->refcount <= 0);
|
||||
cache->refcount--;
|
||||
if (!cache->refcount) {
|
||||
if (ovl_dir_cache(dentry) == cache)
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_fill_merge(struct dir_context *ctx, const char *name,
|
||||
int namelen, loff_t offset, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
rdd->count++;
|
||||
if (!rdd->is_merge)
|
||||
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||
else
|
||||
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static inline int ovl_dir_read(struct path *realpath,
|
||||
struct ovl_readdir_data *rdd)
|
||||
{
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
rdd->dir = realpath->dentry;
|
||||
rdd->ctx.pos = 0;
|
||||
do {
|
||||
rdd->count = 0;
|
||||
rdd->err = 0;
|
||||
err = iterate_dir(realfile, &rdd->ctx);
|
||||
if (err >= 0)
|
||||
err = rdd->err;
|
||||
} while (!err && rdd->count);
|
||||
fput(realfile);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_dir_reset(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||
ovl_cache_put(od, dentry);
|
||||
od->cache = NULL;
|
||||
od->cursor = NULL;
|
||||
}
|
||||
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||
od->is_real = false;
|
||||
}
|
||||
|
||||
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_fill_merge,
|
||||
.list = list,
|
||||
.root = RB_ROOT,
|
||||
.is_merge = false,
|
||||
};
|
||||
int idx, next;
|
||||
|
||||
for (idx = 0; idx != -1; idx = next) {
|
||||
next = ovl_path_next(idx, dentry, &realpath);
|
||||
|
||||
if (next != -1) {
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
if (err)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Insert lowest layer entries before upper ones, this
|
||||
* allows offsets to be reasonably constant
|
||||
*/
|
||||
list_add(&rdd.middle, rdd.list);
|
||||
rdd.is_merge = true;
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
list_del(&rdd.middle);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t off = 0;
|
||||
|
||||
list_for_each(p, &od->cache->entries) {
|
||||
if (off >= pos)
|
||||
break;
|
||||
off++;
|
||||
}
|
||||
/* Cursor is safe since the cache is stable */
|
||||
od->cursor = p;
|
||||
}
|
||||
|
||||
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_dir_cache(dentry);
|
||||
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||
cache->refcount++;
|
||||
return cache;
|
||||
}
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||
if (!cache)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cache->refcount = 1;
|
||||
INIT_LIST_HEAD(&cache->entries);
|
||||
|
||||
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||
if (res) {
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
cache->version = ovl_dentry_version_get(dentry);
|
||||
ovl_set_dir_cache(dentry, cache);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
static int ovl_iterate(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
if (!ctx->pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real)
|
||||
return iterate_dir(od->realfile, ctx);
|
||||
|
||||
if (!od->cache) {
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_cache_get(dentry);
|
||||
if (IS_ERR(cache))
|
||||
return PTR_ERR(cache);
|
||||
|
||||
od->cache = cache;
|
||||
ovl_seek_cursor(od, ctx->pos);
|
||||
}
|
||||
|
||||
while (od->cursor != &od->cache->entries) {
|
||||
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||
if (!p->is_whiteout)
|
||||
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
|
||||
break;
|
||||
od->cursor = p->l_node.next;
|
||||
ctx->pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
loff_t res;
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
mutex_lock(&file_inode(file)->i_mutex);
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_llseek(od->realfile, offset, origin);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
} else {
|
||||
res = -EINVAL;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
case SEEK_SET:
|
||||
break;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (offset < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
if (od->cache)
|
||||
ovl_seek_cursor(od, offset);
|
||||
}
|
||||
res = offset;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&file_inode(file)->i_mutex);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
|
||||
/*
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = lockless_dereference(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||
smp_mb__before_spinlock();
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->upperfile = realfile;
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return vfs_fsync_range(realfile, start, end, datasync);
|
||||
}
|
||||
|
||||
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
if (od->cache) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ovl_cache_put(od, file->f_path.dentry);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
fput(od->realfile);
|
||||
if (od->upperfile)
|
||||
fput(od->upperfile);
|
||||
kfree(od);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct path realpath;
|
||||
struct file *realfile;
|
||||
struct ovl_dir_file *od;
|
||||
enum ovl_path_type type;
|
||||
|
||||
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||
if (!od)
|
||||
return -ENOMEM;
|
||||
|
||||
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||
if (IS_ERR(realfile)) {
|
||||
kfree(od);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->realfile = realfile;
|
||||
od->is_real = !OVL_TYPE_MERGE(type);
|
||||
od->is_upper = OVL_TYPE_UPPER(type);
|
||||
file->private_data = od;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ovl_dir_operations = {
|
||||
.read = generic_read_dir,
|
||||
.open = ovl_dir_open,
|
||||
.iterate = ovl_iterate,
|
||||
.llseek = ovl_dir_llseek,
|
||||
.fsync = ovl_dir_fsync,
|
||||
.release = ovl_dir_release,
|
||||
};
|
||||
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
err = ovl_dir_read_merged(dentry, list);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = 0;
|
||||
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
if (p->is_whiteout)
|
||||
continue;
|
||||
|
||||
if (p->name[0] == '.') {
|
||||
if (p->len == 1)
|
||||
continue;
|
||||
if (p->len == 2 && p->name[1] == '.')
|
||||
continue;
|
||||
}
|
||||
err = -ENOTEMPTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
struct dentry *dentry;
|
||||
|
||||
if (!p->is_whiteout)
|
||||
continue;
|
||||
|
||||
dentry = lookup_one_len(p->name, upper, p->len);
|
||||
if (IS_ERR(dentry)) {
|
||||
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||
upper->d_name.name, p->len, p->name,
|
||||
(int) PTR_ERR(dentry));
|
||||
continue;
|
||||
}
|
||||
ovl_cleanup(upper->d_inode, dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
mutex_unlock(&upper->d_inode->i_mutex);
|
||||
}
|
||||
1094
executer/kernel/mcoverlayfs/linux-4.0.9/super.c
Normal file
1094
executer/kernel/mcoverlayfs/linux-4.0.9/super.c
Normal file
File diff suppressed because it is too large
Load Diff
21
executer/kernel/mcoverlayfs/linux-4.6.7/Makefile.in
Normal file
21
executer/kernel/mcoverlayfs/linux-4.6.7/Makefile.in
Normal file
@ -0,0 +1,21 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
KMODDIR = @KMODDIR@
|
||||
src = @abs_srcdir@
|
||||
|
||||
obj-m += mcoverlay.o
|
||||
|
||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||
|
||||
460
executer/kernel/mcoverlayfs/linux-4.6.7/copy_up.c
Normal file
460
executer/kernel/mcoverlayfs/linux-4.6.7/copy_up.c
Normal file
@ -0,0 +1,460 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
static bool __read_mostly ovl_check_copy_up;
|
||||
module_param_named(check_copy_up, ovl_check_copy_up, bool,
|
||||
S_IWUSR | S_IRUGO);
|
||||
MODULE_PARM_DESC(ovl_check_copy_up,
|
||||
"Warn on copy-up when causing process also has a R/O fd open");
|
||||
|
||||
static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
|
||||
{
|
||||
const struct dentry *dentry = data;
|
||||
|
||||
if (f->f_inode == d_inode(dentry))
|
||||
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
|
||||
f, fd, current->pid, current->comm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the fds open by this process and warn if something like the following
|
||||
* scenario is about to occur:
|
||||
*
|
||||
* fd1 = open("foo", O_RDONLY);
|
||||
* fd2 = open("foo", O_RDWR);
|
||||
*/
|
||||
static void ovl_do_check_copy_up(struct dentry *dentry)
|
||||
{
|
||||
if (ovl_check_copy_up)
|
||||
iterate_fd(current->files, 0, ovl_check_fd, dentry);
|
||||
}
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new, unsigned opt)
|
||||
{
|
||||
ssize_t list_size, size, value_size = 0;
|
||||
char *buf, *name, *value = NULL;
|
||||
int uninitialized_var(error);
|
||||
|
||||
if (!old->d_inode->i_op->getxattr ||
|
||||
!new->d_inode->i_op->getxattr)
|
||||
return 0;
|
||||
|
||||
list_size = vfs_listxattr(old, NULL, 0);
|
||||
if (list_size <= 0) {
|
||||
if (list_size == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return list_size;
|
||||
}
|
||||
|
||||
buf = kzalloc(list_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
list_size = vfs_listxattr(old, buf, list_size);
|
||||
if (list_size <= 0) {
|
||||
error = list_size;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||
retry:
|
||||
size = vfs_getxattr(old, name, value, value_size);
|
||||
if (size == -ERANGE)
|
||||
size = vfs_getxattr(old, name, NULL, 0);
|
||||
|
||||
if (size < 0) {
|
||||
if (OVL_OPT_NOFSCHECK(opt)) {
|
||||
OVL_DEBUG("fail: old=%pd4, i_ino=%lu, name=%s\n",
|
||||
old, old->d_inode->i_ino, name);
|
||||
continue;
|
||||
} else {
|
||||
error = size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
OVL_DEBUG("success: old=%pd4, i_ino=%lu, name=%s\n",
|
||||
old, old->d_inode->i_ino, name);
|
||||
|
||||
if (size > value_size) {
|
||||
void *new;
|
||||
|
||||
new = krealloc(value, size, GFP_KERNEL);
|
||||
if (!new) {
|
||||
error = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
value = new;
|
||||
value_size = size;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
kfree(value);
|
||||
out:
|
||||
kfree(buf);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||
{
|
||||
struct file *old_file;
|
||||
struct file *new_file;
|
||||
loff_t old_pos = 0;
|
||||
loff_t new_pos = 0;
|
||||
int error = 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
|
||||
if (IS_ERR(old_file))
|
||||
return PTR_ERR(old_file);
|
||||
|
||||
new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
|
||||
if (IS_ERR(new_file)) {
|
||||
error = PTR_ERR(new_file);
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
/* FIXME: copy up sparse files efficiently */
|
||||
while (len) {
|
||||
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||
long bytes;
|
||||
|
||||
if (len < this_len)
|
||||
this_len = len;
|
||||
|
||||
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||
error = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
bytes = do_splice_direct(old_file, &old_pos,
|
||||
new_file, &new_pos,
|
||||
this_len, SPLICE_F_MOVE);
|
||||
if (bytes <= 0) {
|
||||
error = bytes;
|
||||
break;
|
||||
}
|
||||
WARN_ON(old_pos != new_pos);
|
||||
|
||||
len -= bytes;
|
||||
}
|
||||
|
||||
fput(new_file);
|
||||
out_fput:
|
||||
fput(old_file);
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||
{
|
||||
int res;
|
||||
char *buf;
|
||||
struct inode *inode = realdentry->d_inode;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
res = -EINVAL;
|
||||
if (!inode->i_op->readlink)
|
||||
goto err;
|
||||
|
||||
res = -ENOMEM;
|
||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
old_fs = get_fs();
|
||||
set_fs(get_ds());
|
||||
/* The cast to a user pointer is valid due to the set_fs() */
|
||||
res = inode->i_op->readlink(realdentry,
|
||||
(char __user *)buf, PAGE_SIZE - 1);
|
||||
set_fs(old_fs);
|
||||
if (res < 0) {
|
||||
free_page((unsigned long) buf);
|
||||
goto err;
|
||||
}
|
||||
buf[res] = '\0';
|
||||
|
||||
return buf;
|
||||
|
||||
err:
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid =
|
||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||
.ia_atime = stat->atime,
|
||||
.ia_mtime = stat->mtime,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!S_ISLNK(stat->mode)) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_MODE,
|
||||
.ia_mode = stat->mode,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_UID | ATTR_GID,
|
||||
.ia_uid = stat->uid,
|
||||
.ia_gid = stat->gid,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err)
|
||||
ovl_set_timestamps(upperdentry, stat);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||
struct dentry *dentry, struct path *lowerpath,
|
||||
struct kstat *stat, const char *link)
|
||||
{
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry = NULL;
|
||||
struct dentry *upper = NULL;
|
||||
umode_t mode = stat->mode;
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
int err;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out1;
|
||||
|
||||
/* Can't properly set mode on creation because of the umask */
|
||||
stat->mode &= S_IFMT;
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||
stat->mode = mode;
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
if (S_ISREG(stat->mode)) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = newdentry;
|
||||
|
||||
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(lowerpath->dentry, newdentry, opt);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
inode_lock(newdentry->d_inode);
|
||||
err = ovl_set_attr(newdentry, stat);
|
||||
inode_unlock(newdentry->d_inode);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
newdentry = NULL;
|
||||
|
||||
/*
|
||||
* Non-directores become opaque when copied up.
|
||||
*/
|
||||
if (!S_ISDIR(stat->mode))
|
||||
ovl_dentry_set_opaque(dentry, true);
|
||||
out2:
|
||||
dput(upper);
|
||||
out1:
|
||||
dput(newdentry);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy up a single dentry
|
||||
*
|
||||
* Directory renames only allowed on "pure upper" (already created on
|
||||
* upper filesystem, never copied up). Directories which are on lower or
|
||||
* are merged may not be renamed. For these -EXDEV is returned and
|
||||
* userspace has to deal with it. This means, when copying up a
|
||||
* directory we can rely on it and ancestors being stable.
|
||||
*
|
||||
* Non-directory renames start with copy up of source if necessary. The
|
||||
* actual rename will only proceed once the copy up was successful. Copy
|
||||
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||
* d_parent it is possible that the copy up will lock the old parent. At
|
||||
* that point the file will have already been copied up anyway.
|
||||
*/
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
int err;
|
||||
struct kstat pstat;
|
||||
struct path parentpath;
|
||||
struct dentry *upperdir;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
char *link = NULL;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
ovl_do_check_copy_up(lowerpath->dentry);
|
||||
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
upperdir = parentpath.dentry;
|
||||
|
||||
err = vfs_getattr(&parentpath, &pstat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (S_ISLNK(stat->mode)) {
|
||||
link = ovl_read_symlink(lowerpath->dentry);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_free_link;
|
||||
|
||||
override_cred->fsuid = stat->uid;
|
||||
override_cred->fsgid = stat->gid;
|
||||
/*
|
||||
* CAP_SYS_ADMIN for copying up extended attributes
|
||||
* CAP_DAC_OVERRIDE for create
|
||||
* CAP_FOWNER for chmod, timestamp update
|
||||
* CAP_FSETID for chmod
|
||||
* CAP_CHOWN for chown
|
||||
* CAP_MKNOD for mknod
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = -EIO;
|
||||
if (lock_rename(workdir, upperdir) != NULL) {
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
/* Raced with another copy-up? Nothing to do, then... */
|
||||
err = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||
stat, link);
|
||||
if (!err) {
|
||||
/* Restore timestamps on parent (best effort) */
|
||||
ovl_set_timestamps(upperdir, &pstat);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
out_free_link:
|
||||
if (link)
|
||||
free_page((unsigned long) link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
struct dentry *next;
|
||||
struct dentry *parent;
|
||||
struct path lowerpath;
|
||||
struct kstat stat;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
/* find the topmost dentry not yet copied up */
|
||||
for (;;) {
|
||||
parent = dget_parent(next);
|
||||
|
||||
type = ovl_path_type(parent);
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
dput(next);
|
||||
next = parent;
|
||||
}
|
||||
|
||||
ovl_path_lower(next, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (!err)
|
||||
err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
|
||||
|
||||
dput(parent);
|
||||
dput(next);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
969
executer/kernel/mcoverlayfs/linux-4.6.7/dir.c
Normal file
969
executer/kernel/mcoverlayfs/linux-4.6.7/dir.c
Normal file
@ -0,0 +1,969 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
dget(wdentry);
|
||||
if (d_is_dir(wdentry))
|
||||
err = ovl_do_rmdir(wdir, wdentry);
|
||||
else
|
||||
err = ovl_do_unlink(wdir, wdentry);
|
||||
dput(wdentry);
|
||||
|
||||
if (err) {
|
||||
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||
wdentry, err);
|
||||
}
|
||||
}
|
||||
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||
{
|
||||
struct dentry *temp;
|
||||
char name[20];
|
||||
|
||||
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||
|
||||
temp = lookup_one_len(name, workdir, strlen(name));
|
||||
if (!IS_ERR(temp) && temp->d_inode) {
|
||||
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||
dput(temp);
|
||||
temp = ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* caller holds i_mutex on workdir */
|
||||
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *whiteout;
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
|
||||
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||
if (IS_ERR(whiteout))
|
||||
return whiteout;
|
||||
|
||||
err = ovl_do_whiteout(wdir, whiteout);
|
||||
if (err) {
|
||||
dput(whiteout);
|
||||
whiteout = ERR_PTR(err);
|
||||
}
|
||||
|
||||
return whiteout;
|
||||
}
|
||||
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (newdentry->d_inode)
|
||||
return -ESTALE;
|
||||
|
||||
if (hardlink) {
|
||||
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||
} else {
|
||||
switch (stat->mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFCHR:
|
||||
case S_IFBLK:
|
||||
case S_IFIFO:
|
||||
case S_IFSOCK:
|
||||
err = ovl_do_mknod(dir, newdentry,
|
||||
stat->mode, stat->rdev, debug);
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EPERM;
|
||||
}
|
||||
}
|
||||
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||
/*
|
||||
* Not quite sure if non-instantiated dentry is legal or not.
|
||||
* VFS doesn't seem to care so check and warn here.
|
||||
*/
|
||||
err = -ENOENT;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||
}
|
||||
|
||||
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||
if (err) {
|
||||
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||
upperdentry->d_name.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type type;
|
||||
struct path realpath;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, stat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
stat->dev = dentry->d_sb->s_dev;
|
||||
stat->ino = dentry->d_inode->i_ino;
|
||||
|
||||
/*
|
||||
* It's probably not worth it to count subdirs to get the
|
||||
* correct link count. nlink=1 seems to pacify 'find' and
|
||||
* other utilities.
|
||||
*/
|
||||
if (OVL_TYPE_MERGE(type))
|
||||
stat->nlink = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
inode_lock_nested(udir, I_MUTEX_PARENT);
|
||||
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
inode_unlock(udir);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||
struct dentry *upperdir)
|
||||
{
|
||||
/* Workdir should not be the same as upperdir */
|
||||
if (workdir == upperdir)
|
||||
goto err;
|
||||
|
||||
/* Workdir should not be subdir of upperdir and vice versa */
|
||||
if (lock_rename(workdir, upperdir) != NULL)
|
||||
goto err_unlock;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct path upperpath;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir;
|
||||
struct kstat stat;
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
err = vfs_getattr(&upperpath, &stat);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (!S_ISDIR(stat.mode))
|
||||
goto out_unlock;
|
||||
upper = upperpath.dentry;
|
||||
if (upper->d_parent->d_inode != udir)
|
||||
goto out_unlock;
|
||||
|
||||
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out_unlock;
|
||||
|
||||
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_copy_xattr(upper, opaquedir, opt);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_set_opaque(opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
inode_lock(opaquedir->d_inode);
|
||||
err = ovl_set_attr(opaquedir, &stat);
|
||||
inode_unlock(opaquedir->d_inode);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup_whiteouts(upper, list);
|
||||
ovl_cleanup(wdir, upper);
|
||||
unlock_rename(workdir, upperdir);
|
||||
|
||||
/* dentry's upper doesn't match now, get rid of it */
|
||||
d_drop(dentry);
|
||||
|
||||
return opaquedir;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, opaquedir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *ret = NULL;
|
||||
LIST_HEAD(list);
|
||||
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
if (err)
|
||||
ret = ERR_PTR(err);
|
||||
else {
|
||||
/*
|
||||
* If no upperdentry then skip clearing whiteouts.
|
||||
*
|
||||
* Can race with copy-up, since we don't hold the upperdir
|
||||
* mutex. Doesn't matter, since copy-up can't create a
|
||||
* non-empty directory from an empty one.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry))
|
||||
ret = ovl_clear_empty(dentry, &list);
|
||||
}
|
||||
|
||||
ovl_cache_free(&list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||
if (err)
|
||||
goto out_dput2;
|
||||
|
||||
if (S_ISDIR(stat->mode)) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||
RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup(wdir, upper);
|
||||
} else {
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput2:
|
||||
dput(upper);
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out_dput2;
|
||||
}
|
||||
|
||||
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link, struct dentry *hardlink)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
struct kstat stat = {
|
||||
.mode = mode,
|
||||
.rdev = rdev,
|
||||
};
|
||||
|
||||
err = -ENOMEM;
|
||||
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_iput;
|
||||
|
||||
if (!ovl_dentry_is_opaque(dentry)) {
|
||||
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_iput;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting opaque xattr
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||
hardlink);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
|
||||
if (!err)
|
||||
inode = NULL;
|
||||
out_iput:
|
||||
iput(inode);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
/* Don't allow creation of "whiteout" on overlay */
|
||||
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||
return -EPERM;
|
||||
|
||||
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||
}
|
||||
|
||||
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *link)
|
||||
{
|
||||
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||
}
|
||||
|
||||
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
struct dentry *new)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upper;
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upper = ovl_dentry_upper(old);
|
||||
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *whiteout;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir = NULL;
|
||||
int err;
|
||||
int flags = 0;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
if (is_dir) {
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out;
|
||||
} else {
|
||||
LIST_HEAD(list);
|
||||
|
||||
/*
|
||||
* When removing an empty opaque directory, then it
|
||||
* makes no sense to replace it with an exact replica of
|
||||
* itself. But emptiness still needs to be checked.
|
||||
*/
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
ovl_cache_free(&list);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if ((opaquedir && upper != opaquedir) ||
|
||||
(!opaquedir && ovl_dentry_upper(dentry) &&
|
||||
upper != ovl_dentry_upper(dentry))) {
|
||||
goto out_dput_upper;
|
||||
}
|
||||
|
||||
whiteout = ovl_whiteout(workdir, dentry);
|
||||
err = PTR_ERR(whiteout);
|
||||
if (IS_ERR(whiteout))
|
||||
goto out_dput_upper;
|
||||
|
||||
if (d_is_dir(upper))
|
||||
flags = RENAME_EXCHANGE;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
if (flags)
|
||||
ovl_cleanup(wdir, upper);
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
dput(whiteout);
|
||||
out_dput_upper:
|
||||
dput(upper);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
kill_whiteout:
|
||||
ovl_cleanup(wdir, whiteout);
|
||||
goto out_d_drop;
|
||||
}
|
||||
|
||||
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *dir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
int err;
|
||||
|
||||
inode_lock_nested(dir, I_MUTEX_PARENT);
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (upper == ovl_dentry_upper(dentry)) {
|
||||
if (is_dir)
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
}
|
||||
dput(upper);
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
* upperpath/lowerpath, which could only be done if we are the
|
||||
* sole user of this dentry. Too tricky... Just unhash for
|
||||
* now.
|
||||
*/
|
||||
if (!err)
|
||||
d_drop(dentry);
|
||||
out_unlock:
|
||||
inode_unlock(dir);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||
|
||||
if (check_sticky(dir, inode))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
enum ovl_path_type type;
|
||||
int err;
|
||||
|
||||
err = ovl_check_sticky(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
type = ovl_path_type(dentry);
|
||||
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||
err = ovl_remove_upper(dentry, is_dir);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, false);
|
||||
}
|
||||
|
||||
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, true);
|
||||
}
|
||||
|
||||
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type old_type;
|
||||
enum ovl_path_type new_type;
|
||||
struct dentry *old_upperdir;
|
||||
struct dentry *new_upperdir;
|
||||
struct dentry *olddentry;
|
||||
struct dentry *newdentry;
|
||||
struct dentry *trap;
|
||||
bool old_opaque;
|
||||
bool new_opaque;
|
||||
bool cleanup_whiteout = false;
|
||||
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||
bool is_dir = d_is_dir(old);
|
||||
bool new_is_dir = false;
|
||||
struct dentry *opaquedir = NULL;
|
||||
const struct cred *old_cred = NULL;
|
||||
struct cred *override_cred = NULL;
|
||||
|
||||
err = -EINVAL;
|
||||
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||
goto out;
|
||||
|
||||
flags &= ~RENAME_NOREPLACE;
|
||||
|
||||
err = ovl_check_sticky(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Don't copy up directory trees */
|
||||
old_type = ovl_path_type(old);
|
||||
err = -EXDEV;
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||
goto out;
|
||||
|
||||
if (new->d_inode) {
|
||||
err = ovl_check_sticky(new);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (d_is_dir(new))
|
||||
new_is_dir = true;
|
||||
|
||||
new_type = ovl_path_type(new);
|
||||
err = -EXDEV;
|
||||
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_lower(old)->d_inode ==
|
||||
ovl_dentry_lower(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_upper(old)->d_inode ==
|
||||
ovl_dentry_upper(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (ovl_dentry_is_opaque(new))
|
||||
new_type = __OVL_PATH_UPPER;
|
||||
else
|
||||
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||
}
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(new->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
if (!overwrite) {
|
||||
err = ovl_copy_up(new);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
}
|
||||
|
||||
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||
opaquedir = ovl_check_empty_and_clear(new);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir)) {
|
||||
opaquedir = NULL;
|
||||
goto out_revert_creds;
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite) {
|
||||
if (old_opaque) {
|
||||
if (new->d_inode || !new_opaque) {
|
||||
/* Whiteout source */
|
||||
flags |= RENAME_WHITEOUT;
|
||||
} else {
|
||||
/* Switch whiteouts */
|
||||
flags |= RENAME_EXCHANGE;
|
||||
}
|
||||
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||
flags |= RENAME_EXCHANGE;
|
||||
cleanup_whiteout = true;
|
||||
}
|
||||
}
|
||||
|
||||
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||
|
||||
trap = lock_rename(new_upperdir, old_upperdir);
|
||||
|
||||
|
||||
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
|
||||
old->d_name.len);
|
||||
err = PTR_ERR(olddentry);
|
||||
if (IS_ERR(olddentry))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (olddentry != ovl_dentry_upper(old))
|
||||
goto out_dput_old;
|
||||
|
||||
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||
new->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_dput_old;
|
||||
|
||||
err = -ESTALE;
|
||||
if (ovl_dentry_upper(new)) {
|
||||
if (opaquedir) {
|
||||
if (newdentry != opaquedir)
|
||||
goto out_dput;
|
||||
} else {
|
||||
if (newdentry != ovl_dentry_upper(new))
|
||||
goto out_dput;
|
||||
}
|
||||
} else {
|
||||
if (!d_is_negative(newdentry) &&
|
||||
(!new_opaque || !ovl_is_whiteout(newdentry)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (olddentry == trap)
|
||||
goto out_dput;
|
||||
if (newdentry == trap)
|
||||
goto out_dput;
|
||||
|
||||
if (is_dir && !old_opaque && new_opaque) {
|
||||
err = ovl_set_opaque(olddentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
flags);
|
||||
} else {
|
||||
/* No debug for the plain case */
|
||||
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
NULL, flags);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
|
||||
/*
|
||||
* Old dentry now lives in different location. Dentries in
|
||||
* lowerstack are stale. We cannot drop them here because
|
||||
* access to them is lockless. This could be only pure upper
|
||||
* or opaque directory - numlower is zero. Or upper non-dir
|
||||
* entry - its pureness is tracked by flag opaque.
|
||||
*/
|
||||
if (old_opaque != new_opaque) {
|
||||
ovl_dentry_set_opaque(old, new_opaque);
|
||||
if (!overwrite)
|
||||
ovl_dentry_set_opaque(new, old_opaque);
|
||||
}
|
||||
|
||||
if (cleanup_whiteout)
|
||||
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent);
|
||||
ovl_dentry_version_inc(new->d_parent);
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_dput_old:
|
||||
dput(olddentry);
|
||||
out_unlock:
|
||||
unlock_rename(new_upperdir, old_upperdir);
|
||||
out_revert_creds:
|
||||
if (old_opaque || new_opaque) {
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
dput(opaquedir);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct inode_operations ovl_dir_inode_operations = {
|
||||
.lookup = ovl_lookup,
|
||||
.mkdir = ovl_mkdir,
|
||||
.symlink = ovl_symlink,
|
||||
.unlink = ovl_unlink,
|
||||
.rmdir = ovl_rmdir,
|
||||
.rename2 = ovl_rename2,
|
||||
.link = ovl_link,
|
||||
.setattr = ovl_setattr,
|
||||
.create = ovl_create,
|
||||
.mknod = ovl_mknod,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_dir_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
494
executer/kernel/mcoverlayfs/linux-4.6.7/inode.c
Normal file
494
executer/kernel/mcoverlayfs/linux-4.6.7/inode.c
Normal file
@ -0,0 +1,494 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static int ovl_copy_up_truncate(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *parent;
|
||||
struct kstat stat;
|
||||
struct path lowerpath;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
err = ovl_copy_up(parent);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
ovl_path_lower(dentry, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
stat.size = 0;
|
||||
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
|
||||
|
||||
out_dput_parent:
|
||||
dput(parent);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
|
||||
if (OVL_OPT_NOCOPYUPW(opt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for permissions before trying to copy-up. This is redundant
|
||||
* since it will be rechecked later by ->setattr() on upper dentry. But
|
||||
* without this, copy-up can be triggered by just about anybody.
|
||||
*
|
||||
* We don't initialize inode->size, which just means that
|
||||
* inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
|
||||
* check for a swapfile (which this won't be anyway).
|
||||
*/
|
||||
err = inode_change_ok(dentry->d_inode, attr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
struct inode *realinode = d_inode(ovl_dentry_real(dentry));
|
||||
|
||||
err = -ETXTBSY;
|
||||
if (atomic_read(&realinode->i_writecount) < 0)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (!err) {
|
||||
struct inode *winode = NULL;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
winode = d_inode(upperdentry);
|
||||
err = get_write_access(winode);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
|
||||
attr->ia_valid &= ~ATTR_MODE;
|
||||
|
||||
inode_lock(upperdentry->d_inode);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
if (!err)
|
||||
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
|
||||
inode_unlock(upperdentry->d_inode);
|
||||
|
||||
if (winode)
|
||||
put_write_access(winode);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct path realpath;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
return vfs_getattr(&realpath, stat);
|
||||
}
|
||||
|
||||
int ovl_permission(struct inode *inode, int mask)
|
||||
{
|
||||
struct ovl_entry *oe;
|
||||
struct dentry *alias = NULL;
|
||||
struct inode *realinode;
|
||||
struct dentry *realdentry;
|
||||
bool is_upper;
|
||||
int err;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
oe = inode->i_private;
|
||||
} else if (mask & MAY_NOT_BLOCK) {
|
||||
return -ECHILD;
|
||||
} else {
|
||||
/*
|
||||
* For non-directories find an alias and get the info
|
||||
* from there.
|
||||
*/
|
||||
alias = d_find_any_alias(inode);
|
||||
if (WARN_ON(!alias))
|
||||
return -ENOENT;
|
||||
|
||||
oe = alias->d_fsdata;
|
||||
|
||||
ovl_reset_ovl_entry(&oe, alias);
|
||||
}
|
||||
|
||||
realdentry = ovl_entry_real(oe, &is_upper);
|
||||
|
||||
if (ovl_is_default_permissions(inode)) {
|
||||
struct kstat stat;
|
||||
struct path realpath = { .dentry = realdentry };
|
||||
|
||||
if (mask & MAY_NOT_BLOCK)
|
||||
return -ECHILD;
|
||||
|
||||
realpath.mnt = ovl_entry_mnt_real(oe, inode, is_upper);
|
||||
|
||||
err = vfs_getattr(&realpath, &stat);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = -ESTALE;
|
||||
if ((stat.mode ^ inode->i_mode) & S_IFMT)
|
||||
goto out_dput;
|
||||
|
||||
inode->i_mode = stat.mode;
|
||||
inode->i_uid = stat.uid;
|
||||
inode->i_gid = stat.gid;
|
||||
|
||||
err = generic_permission(inode, mask);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
/* Careful in RCU walk mode */
|
||||
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||
if (!realinode) {
|
||||
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||
err = -ENOENT;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (mask & MAY_WRITE) {
|
||||
umode_t mode = realinode->i_mode;
|
||||
|
||||
/*
|
||||
* Writes will always be redirected to upper layer, so
|
||||
* ignore lower layer being read-only.
|
||||
*
|
||||
* If the overlay itself is read-only then proceed
|
||||
* with the permission check, don't return EROFS.
|
||||
* This will only happen if this is the lower layer of
|
||||
* another overlayfs.
|
||||
*
|
||||
* If upper fs becomes read-only after the overlay was
|
||||
* constructed return EROFS to prevent modification of
|
||||
* upper layer.
|
||||
*/
|
||||
err = -EROFS;
|
||||
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
err = __inode_permission(realinode, mask);
|
||||
out_dput:
|
||||
dput(alias);
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char *ovl_get_link(struct dentry *dentry,
|
||||
struct inode *inode,
|
||||
struct delayed_call *done)
|
||||
{
|
||||
struct dentry *realdentry;
|
||||
struct inode *realinode;
|
||||
|
||||
if (!dentry)
|
||||
return ERR_PTR(-ECHILD);
|
||||
|
||||
realdentry = ovl_dentry_real(dentry);
|
||||
realinode = realdentry->d_inode;
|
||||
|
||||
if (WARN_ON(!realinode->i_op->get_link))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
return realinode->i_op->get_link(realdentry, realinode, done);
|
||||
}
|
||||
|
||||
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||
{
|
||||
struct path realpath;
|
||||
struct inode *realinode;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
realinode = realpath.dentry->d_inode;
|
||||
|
||||
if (!realinode->i_op->readlink)
|
||||
return -EINVAL;
|
||||
|
||||
touch_atime(&realpath);
|
||||
|
||||
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||
}
|
||||
|
||||
|
||||
static bool ovl_is_private_xattr(const char *name)
|
||||
{
|
||||
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||
}
|
||||
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
|
||||
if (OVL_OPT_NOCOPYUPW(opt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
if (ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||
enum ovl_path_type type)
|
||||
{
|
||||
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||
return S_ISDIR(dentry->d_inode->i_mode);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
return -ENODATA;
|
||||
|
||||
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||
}
|
||||
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
ssize_t res;
|
||||
int off;
|
||||
|
||||
res = vfs_listxattr(realpath.dentry, list, size);
|
||||
if (res <= 0 || size == 0)
|
||||
return res;
|
||||
|
||||
if (!ovl_need_xattr_filter(dentry, type))
|
||||
return res;
|
||||
|
||||
/* filter out private xattrs */
|
||||
for (off = 0; off < res;) {
|
||||
char *s = list + off;
|
||||
size_t slen = strlen(s) + 1;
|
||||
|
||||
BUG_ON(off + slen > res);
|
||||
|
||||
if (ovl_is_private_xattr(s)) {
|
||||
res -= slen;
|
||||
memmove(s, s + slen, res - off);
|
||||
} else {
|
||||
off += slen;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
|
||||
if (OVL_OPT_NOCOPYUPW(opt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -ENODATA;
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
if (!OVL_TYPE_UPPER(type)) {
|
||||
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
err = vfs_removexattr(realpath.dentry, name);
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||
struct dentry *realdentry)
|
||||
{
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
return false;
|
||||
|
||||
if (special_file(realdentry->d_inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type;
|
||||
unsigned opt = ovl_get_config_opt(dentry);
|
||||
|
||||
if (d_is_dir(dentry))
|
||||
return d_backing_inode(dentry);
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
if (!OVL_OPT_NOCOPYUPW(opt) &&
|
||||
ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
|
||||
OVL_DEBUG("copyup: realpath.dentry=%pd4, i_ino=%lu\n",
|
||||
realpath.dentry, realpath.dentry->d_inode->i_ino);
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
if (file_flags & O_TRUNC)
|
||||
err = ovl_copy_up_truncate(dentry);
|
||||
else
|
||||
err = ovl_copy_up(dentry);
|
||||
ovl_drop_write(dentry);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
if (realpath.dentry->d_flags & DCACHE_OP_SELECT_INODE)
|
||||
return realpath.dentry->d_op->d_select_inode(realpath.dentry, file_flags);
|
||||
|
||||
if (OVL_OPT_NOFSCHECK(opt)) {
|
||||
if (realpath.dentry->d_inode->i_sb->s_magic == SYSFS_MAGIC) {
|
||||
OVL_DEBUG("sysfs: dentry=%pd4, i_ino=%lu\n",
|
||||
dentry, dentry->d_inode->i_ino);
|
||||
OVL_DEBUG("sysfs: realpath.dentry=%pd4, i_ino=%lu\n",
|
||||
realpath.dentry, realpath.dentry->d_inode->i_ino);
|
||||
if (!dentry->d_inode->i_private) {
|
||||
dentry->d_inode->i_private = dentry->d_fsdata;
|
||||
dentry->d_fsdata = realpath.dentry->d_fsdata;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return d_backing_inode(realpath.dentry);
|
||||
}
|
||||
|
||||
static const struct inode_operations ovl_file_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.get_link = ovl_get_link,
|
||||
.readlink = ovl_readlink,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
inode->i_ino = get_next_ino();
|
||||
inode->i_mode = mode;
|
||||
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||
|
||||
mode &= S_IFMT;
|
||||
switch (mode) {
|
||||
case S_IFDIR:
|
||||
inode->i_private = oe;
|
||||
inode->i_op = &ovl_dir_inode_operations;
|
||||
inode->i_fop = &ovl_dir_operations;
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
inode->i_op = &ovl_symlink_inode_operations;
|
||||
break;
|
||||
|
||||
case S_IFREG:
|
||||
case S_IFSOCK:
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
case S_IFIFO:
|
||||
inode->i_op = &ovl_file_inode_operations;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN(1, "illegal file type: %i\n", mode);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
222
executer/kernel/mcoverlayfs/linux-4.6.7/overlayfs.h
Normal file
222
executer/kernel/mcoverlayfs/linux-4.6.7/overlayfs.h
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
//#define DEBUG
|
||||
#ifdef DEBUG
|
||||
#define OVL_DEBUG(format, ...) pr_err("[DEBUG] %s(): " format, __FUNCTION__, ##__VA_ARGS__)
|
||||
#else
|
||||
#define OVL_DEBUG(format, ...) {}
|
||||
#endif
|
||||
|
||||
struct ovl_entry;
|
||||
|
||||
enum ovl_path_type {
|
||||
__OVL_PATH_PURE = (1 << 0),
|
||||
__OVL_PATH_UPPER = (1 << 1),
|
||||
__OVL_PATH_MERGE = (1 << 2),
|
||||
};
|
||||
|
||||
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||
|
||||
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||
#define OVL_XATTR_PRE_LEN 16
|
||||
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||
|
||||
enum ovl_opt_bit {
|
||||
__OVL_OPT_DEFAULT = 0,
|
||||
__OVL_OPT_NOCOPYUPW = (1 << 0),
|
||||
__OVL_OPT_NOFSCHECK = (1 << 1),
|
||||
};
|
||||
|
||||
#define OVL_OPT_NOCOPYUPW(opt) ((opt) & __OVL_OPT_NOCOPYUPW)
|
||||
#define OVL_OPT_NOFSCHECK(opt) ((opt) & __OVL_OPT_NOFSCHECK)
|
||||
|
||||
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_rmdir(dir, dentry);
|
||||
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_unlink(dir, dentry, NULL);
|
||||
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *new_dentry, bool debug)
|
||||
{
|
||||
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||
if (debug) {
|
||||
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||
old_dentry, new_dentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_create(dir, dentry, mode, true);
|
||||
if (debug)
|
||||
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_mkdir(dir, dentry, mode);
|
||||
if (debug)
|
||||
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t dev, bool debug)
|
||||
{
|
||||
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||
if (debug) {
|
||||
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||
dentry, mode, dev, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *oldname, bool debug)
|
||||
{
|
||||
int err = vfs_symlink(dir, dentry, oldname);
|
||||
if (debug)
|
||||
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||
dentry, name, (int) size, (char *) value, flags, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err = vfs_removexattr(dentry, name);
|
||||
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||
struct inode *newdir, struct dentry *newdentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||
olddentry, newdentry, flags);
|
||||
|
||||
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||
|
||||
if (err) {
|
||||
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||
olddentry, newdentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_whiteout(dir, dentry);
|
||||
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
unsigned ovl_get_config_opt(struct dentry *dentry);
|
||||
void ovl_reset_ovl_entry(struct ovl_entry **oe, struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||
struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode,
|
||||
bool is_upper);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||
bool ovl_is_default_permissions(struct inode *inode);
|
||||
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
|
||||
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||
struct kstat *stat, const char *link);
|
||||
|
||||
/* readdir.c */
|
||||
extern const struct file_operations ovl_dir_operations;
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||
void ovl_cache_free(struct list_head *list);
|
||||
int ovl_check_d_type_supported(struct path *realpath);
|
||||
|
||||
/* inode.c */
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ovl_permission(struct inode *inode, int mask);
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||
struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags);
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe);
|
||||
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
{
|
||||
to->i_uid = from->i_uid;
|
||||
to->i_gid = from->i_gid;
|
||||
to->i_mode = from->i_mode;
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
extern const struct inode_operations ovl_dir_inode_operations;
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug);
|
||||
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new, unsigned opt);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
616
executer/kernel/mcoverlayfs/linux-4.6.7/readdir.c
Normal file
616
executer/kernel/mcoverlayfs/linux-4.6.7/readdir.c
Normal file
@ -0,0 +1,616 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
struct ovl_cache_entry {
|
||||
unsigned int len;
|
||||
unsigned int type;
|
||||
u64 ino;
|
||||
struct list_head l_node;
|
||||
struct rb_node node;
|
||||
struct ovl_cache_entry *next_maybe_whiteout;
|
||||
bool is_whiteout;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct ovl_dir_cache {
|
||||
long refcount;
|
||||
u64 version;
|
||||
struct list_head entries;
|
||||
};
|
||||
|
||||
struct ovl_readdir_data {
|
||||
struct dir_context ctx;
|
||||
bool is_lowest;
|
||||
struct rb_root root;
|
||||
struct list_head *list;
|
||||
struct list_head middle;
|
||||
struct ovl_cache_entry *first_maybe_whiteout;
|
||||
int count;
|
||||
int err;
|
||||
bool d_type_supported;
|
||||
};
|
||||
|
||||
struct ovl_dir_file {
|
||||
bool is_real;
|
||||
bool is_upper;
|
||||
struct ovl_dir_cache *cache;
|
||||
struct list_head *cursor;
|
||||
struct file *realfile;
|
||||
struct file *upperfile;
|
||||
};
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||
{
|
||||
return container_of(n, struct ovl_cache_entry, node);
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||
const char *name, int len)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||
|
||||
cmp = strncmp(name, p->name, len);
|
||||
if (cmp > 0)
|
||||
node = p->node.rb_right;
|
||||
else if (cmp < 0 || len < p->len)
|
||||
node = p->node.rb_left;
|
||||
else
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len,
|
||||
u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||
|
||||
p = kmalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p->name, name, len);
|
||||
p->name[len] = '\0';
|
||||
p->len = len;
|
||||
p->type = d_type;
|
||||
p->ino = ino;
|
||||
p->is_whiteout = false;
|
||||
|
||||
if (d_type == DT_CHR) {
|
||||
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct rb_node **newp = &rdd->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
while (*newp) {
|
||||
int cmp;
|
||||
struct ovl_cache_entry *tmp;
|
||||
|
||||
parent = *newp;
|
||||
tmp = ovl_cache_entry_from_node(*newp);
|
||||
cmp = strncmp(name, tmp->name, len);
|
||||
if (cmp > 0)
|
||||
newp = &tmp->node.rb_right;
|
||||
else if (cmp < 0 || len < tmp->len)
|
||||
newp = &tmp->node.rb_left;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
|
||||
if (p == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
list_add_tail(&p->l_node, rdd->list);
|
||||
rb_link_node(&p->node, parent, newp);
|
||||
rb_insert_color(&p->node, &rdd->root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
|
||||
const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||
if (p) {
|
||||
list_move_tail(&p->l_node, &rdd->middle);
|
||||
} else {
|
||||
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
||||
if (p == NULL)
|
||||
rdd->err = -ENOMEM;
|
||||
else
|
||||
list_add_tail(&p->l_node, &rdd->middle);
|
||||
}
|
||||
|
||||
return rdd->err;
|
||||
}
|
||||
|
||||
void ovl_cache_free(struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
struct ovl_cache_entry *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, list, l_node)
|
||||
kfree(p);
|
||||
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
|
||||
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||
{
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
|
||||
WARN_ON(cache->refcount <= 0);
|
||||
cache->refcount--;
|
||||
if (!cache->refcount) {
|
||||
if (ovl_dir_cache(dentry) == cache)
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_fill_merge(struct dir_context *ctx, const char *name,
|
||||
int namelen, loff_t offset, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
rdd->count++;
|
||||
if (!rdd->is_lowest)
|
||||
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||
else
|
||||
return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
struct dentry *dentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* CAP_DAC_OVERRIDE for lookup
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = mutex_lock_killable(&dir->d_inode->i_mutex);
|
||||
if (!err) {
|
||||
while (rdd->first_maybe_whiteout) {
|
||||
p = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
|
||||
dentry = lookup_one_len(p->name, dir, p->len);
|
||||
if (!IS_ERR(dentry)) {
|
||||
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
}
|
||||
inode_unlock(dir->d_inode);
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_dir_read(struct path *realpath,
|
||||
struct ovl_readdir_data *rdd)
|
||||
{
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
rdd->first_maybe_whiteout = NULL;
|
||||
rdd->ctx.pos = 0;
|
||||
do {
|
||||
rdd->count = 0;
|
||||
rdd->err = 0;
|
||||
err = iterate_dir(realfile, &rdd->ctx);
|
||||
if (err >= 0)
|
||||
err = rdd->err;
|
||||
} while (!err && rdd->count);
|
||||
|
||||
if (!err && rdd->first_maybe_whiteout)
|
||||
err = ovl_check_whiteouts(realpath->dentry, rdd);
|
||||
|
||||
fput(realfile);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_dir_reset(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||
ovl_cache_put(od, dentry);
|
||||
od->cache = NULL;
|
||||
od->cursor = NULL;
|
||||
}
|
||||
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||
od->is_real = false;
|
||||
}
|
||||
|
||||
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_fill_merge,
|
||||
.list = list,
|
||||
.root = RB_ROOT,
|
||||
.is_lowest = false,
|
||||
};
|
||||
int idx, next;
|
||||
|
||||
for (idx = 0; idx != -1; idx = next) {
|
||||
next = ovl_path_next(idx, dentry, &realpath);
|
||||
|
||||
if (next != -1) {
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
if (err)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Insert lowest layer entries before upper ones, this
|
||||
* allows offsets to be reasonably constant
|
||||
*/
|
||||
list_add(&rdd.middle, rdd.list);
|
||||
rdd.is_lowest = true;
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
list_del(&rdd.middle);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t off = 0;
|
||||
|
||||
list_for_each(p, &od->cache->entries) {
|
||||
if (off >= pos)
|
||||
break;
|
||||
off++;
|
||||
}
|
||||
/* Cursor is safe since the cache is stable */
|
||||
od->cursor = p;
|
||||
}
|
||||
|
||||
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_dir_cache(dentry);
|
||||
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||
cache->refcount++;
|
||||
return cache;
|
||||
}
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||
if (!cache)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cache->refcount = 1;
|
||||
INIT_LIST_HEAD(&cache->entries);
|
||||
|
||||
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||
if (res) {
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
cache->version = ovl_dentry_version_get(dentry);
|
||||
ovl_set_dir_cache(dentry, cache);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
static int ovl_iterate(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
if (!ctx->pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real)
|
||||
return iterate_dir(od->realfile, ctx);
|
||||
|
||||
if (!od->cache) {
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_cache_get(dentry);
|
||||
if (IS_ERR(cache))
|
||||
return PTR_ERR(cache);
|
||||
|
||||
od->cache = cache;
|
||||
ovl_seek_cursor(od, ctx->pos);
|
||||
}
|
||||
|
||||
while (od->cursor != &od->cache->entries) {
|
||||
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||
if (!p->is_whiteout)
|
||||
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
|
||||
break;
|
||||
od->cursor = p->l_node.next;
|
||||
ctx->pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
loff_t res;
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
inode_lock(file_inode(file));
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_llseek(od->realfile, offset, origin);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
} else {
|
||||
res = -EINVAL;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
case SEEK_SET:
|
||||
break;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (offset < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
if (od->cache)
|
||||
ovl_seek_cursor(od, offset);
|
||||
}
|
||||
res = offset;
|
||||
}
|
||||
out_unlock:
|
||||
inode_unlock(file_inode(file));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
|
||||
/*
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = lockless_dereference(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||
smp_mb__before_spinlock();
|
||||
inode_lock(inode);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
inode_unlock(inode);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->upperfile = realfile;
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
}
|
||||
inode_unlock(inode);
|
||||
}
|
||||
}
|
||||
|
||||
return vfs_fsync_range(realfile, start, end, datasync);
|
||||
}
|
||||
|
||||
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
if (od->cache) {
|
||||
inode_lock(inode);
|
||||
ovl_cache_put(od, file->f_path.dentry);
|
||||
inode_unlock(inode);
|
||||
}
|
||||
fput(od->realfile);
|
||||
if (od->upperfile)
|
||||
fput(od->upperfile);
|
||||
kfree(od);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct path realpath;
|
||||
struct file *realfile;
|
||||
struct ovl_dir_file *od;
|
||||
enum ovl_path_type type;
|
||||
|
||||
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||
if (!od)
|
||||
return -ENOMEM;
|
||||
|
||||
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||
if (IS_ERR(realfile)) {
|
||||
kfree(od);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->realfile = realfile;
|
||||
od->is_real = !OVL_TYPE_MERGE(type);
|
||||
od->is_upper = OVL_TYPE_UPPER(type);
|
||||
file->private_data = od;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ovl_dir_operations = {
|
||||
.read = generic_read_dir,
|
||||
.open = ovl_dir_open,
|
||||
.iterate = ovl_iterate,
|
||||
.llseek = ovl_dir_llseek,
|
||||
.fsync = ovl_dir_fsync,
|
||||
.release = ovl_dir_release,
|
||||
};
|
||||
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
err = ovl_dir_read_merged(dentry, list);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = 0;
|
||||
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
if (p->is_whiteout)
|
||||
continue;
|
||||
|
||||
if (p->name[0] == '.') {
|
||||
if (p->len == 1)
|
||||
continue;
|
||||
if (p->len == 2 && p->name[1] == '.')
|
||||
continue;
|
||||
}
|
||||
err = -ENOTEMPTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
struct dentry *dentry;
|
||||
|
||||
if (!p->is_whiteout)
|
||||
continue;
|
||||
|
||||
dentry = lookup_one_len(p->name, upper, p->len);
|
||||
if (IS_ERR(dentry)) {
|
||||
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||
upper->d_name.name, p->len, p->name,
|
||||
(int) PTR_ERR(dentry));
|
||||
continue;
|
||||
}
|
||||
if (dentry->d_inode)
|
||||
ovl_cleanup(upper->d_inode, dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
inode_unlock(upper->d_inode);
|
||||
}
|
||||
|
||||
static int ovl_check_d_type(struct dir_context *ctx, const char *name,
|
||||
int namelen, loff_t offset, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
/* Even if d_type is not supported, DT_DIR is returned for . and .. */
|
||||
if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
|
||||
return 0;
|
||||
|
||||
if (d_type != DT_UNKNOWN)
|
||||
rdd->d_type_supported = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
|
||||
* if error is encountered.
|
||||
*/
|
||||
int ovl_check_d_type_supported(struct path *realpath)
|
||||
{
|
||||
int err;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_check_d_type,
|
||||
.d_type_supported = false,
|
||||
};
|
||||
|
||||
err = ovl_dir_read(realpath, &rdd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return rdd.d_type_supported;
|
||||
}
|
||||
1298
executer/kernel/mcoverlayfs/linux-4.6.7/super.c
Normal file
1298
executer/kernel/mcoverlayfs/linux-4.6.7/super.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,411 +0,0 @@
|
||||
/**
|
||||
* \file procfs.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* mcctrl procfs
|
||||
* \author Naoki Hamada <nao@axe.bz> \par
|
||||
* Copyright (C) 2014 AXE, Inc.
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
#ifdef PROCFS_DEBUG
|
||||
#define dprintk(...) printk(__VA_ARGS__)
|
||||
#else
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct proc_dir_entry *parent;
|
||||
ihk_os_t os;
|
||||
int osnum;
|
||||
int pid;
|
||||
int cpu;
|
||||
char fname[PROCFS_NAME_MAX];
|
||||
};
|
||||
|
||||
/*
|
||||
* In the procfs_file_list, mckenrel procfs files are
|
||||
* listed in the manner that the leaf file is located
|
||||
* always nearer to the list top than its parent node
|
||||
* file.
|
||||
*/
|
||||
|
||||
LIST_HEAD(procfs_file_list);
|
||||
static ihk_spinlock_t procfs_file_list_lock;
|
||||
|
||||
/**
|
||||
* \brief Return specified procfs entry.
|
||||
*
|
||||
* \param p a name of the procfs file
|
||||
* \param osnum os number
|
||||
* \param mode if zero create a directory otherwise a file
|
||||
*
|
||||
* return value: NULL: Something wrong has occurred.
|
||||
* otherwise: address of the proc_dir_entry structure of the procfs file
|
||||
*
|
||||
* p should not be NULL nor terminated by "/".
|
||||
*
|
||||
* We create a procfs entry if there is not already one.
|
||||
* This process is recursive to the root of the procfs tree.
|
||||
*/
|
||||
/*
|
||||
* XXX: Two or more entries which have same name can be created.
|
||||
*
|
||||
* get_procfs_entry() avoids creating an entry which has already been created.
|
||||
* But, it allows creating an entry which is being created by another thread.
|
||||
*
|
||||
* This problem occurred when two requests which created files with a common
|
||||
* ancestor directory which was not explicitly created were racing.
|
||||
*/
|
||||
|
||||
static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
|
||||
{
|
||||
char *r;
|
||||
struct proc_dir_entry *ret = NULL, *parent = NULL;
|
||||
struct procfs_list_entry *e;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if (e == NULL) {
|
||||
kprintf("ERROR: The procfs_file_list has a null entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
|
||||
/* We found the entry */
|
||||
ret = e->entry;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
if (ret != NULL) {
|
||||
return ret;
|
||||
}
|
||||
r = strrchr(p, '/');
|
||||
if (r != NULL) {
|
||||
/* We have non-null parent dir. */
|
||||
strncpy(name, p, r - p);
|
||||
name[r - p] = '\0';
|
||||
parent = get_procfs_entry(name, osnum, 0);
|
||||
if (parent == NULL) {
|
||||
/* We counld not get a parent procfs entry. Give up.*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (e == NULL) {
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
/* Fill the fname field of the entry */
|
||||
strncpy(e->fname, p, PROCFS_NAME_MAX);
|
||||
|
||||
if (r != NULL) {
|
||||
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
|
||||
} else {
|
||||
strncpy(name, p, PROCFS_NAME_MAX);
|
||||
}
|
||||
if (mode == 0) {
|
||||
ret = proc_mkdir(name, parent);
|
||||
} else {
|
||||
ret = create_proc_entry(name, mode, parent);
|
||||
}
|
||||
if (ret == NULL) {
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
|
||||
kfree(e);
|
||||
return NULL;
|
||||
}
|
||||
ret->data = e;
|
||||
e->osnum = osnum;
|
||||
e->entry = ret;
|
||||
e->parent = parent;
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_add(&(e->list), &procfs_file_list);
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
dprintk("get_procfs_entry: %s done\n", p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create a procfs entry.
|
||||
*
|
||||
* \param __os (opeque) os variable
|
||||
* \param ref cpuid of the requesting mckernel process
|
||||
* \param osnum osnum of the requesting mckernel process
|
||||
* \param pid pid of the requesting mckernel process
|
||||
* \param arg sent argument
|
||||
*/
|
||||
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *e;
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
int mode;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
|
||||
dprintk("procfs_create: osnum: %d, cpu: %d, pid: %d\n", osnum, ref, pid);
|
||||
|
||||
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
|
||||
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
|
||||
|
||||
dprintk("name: %s mode: %o\n", f->fname, f->mode);
|
||||
|
||||
strncpy(name, f->fname, PROCFS_NAME_MAX);
|
||||
mode = f->mode;
|
||||
|
||||
if (name[PROCFS_NAME_MAX - 1] != '\0') {
|
||||
printk("ERROR: procfs_creat: file name not properly terminated.\n");
|
||||
goto quit;
|
||||
}
|
||||
entry = get_procfs_entry(name, osnum, mode);
|
||||
if (entry == NULL) {
|
||||
printk("ERROR: could not create a procfs entry for %s.\n", name);
|
||||
goto quit;
|
||||
}
|
||||
|
||||
e = entry->data;
|
||||
e->os = __os;
|
||||
e->cpu = ref;
|
||||
e->pid = pid;
|
||||
|
||||
entry->read_proc = mckernel_procfs_read;
|
||||
quit:
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
|
||||
dprintk("procfs_create: done\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Delete a procfs entry.
|
||||
*
|
||||
* \param __os (opaque) os variable
|
||||
* \param osnum os number
|
||||
* \param arg sent argument
|
||||
*/
|
||||
|
||||
void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
{
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
struct procfs_list_entry *e;
|
||||
struct proc_dir_entry *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
char *r;
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("procfs_delete: \n");
|
||||
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
|
||||
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
|
||||
dprintk("fname: %s.\n", f->fname);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
|
||||
(e->osnum == osnum)) {
|
||||
list_del(&e->list);
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
parent = e->parent;
|
||||
kfree(e);
|
||||
r = strrchr(f->fname, '/');
|
||||
if (r == NULL) {
|
||||
strncpy(name, f->fname, PROCFS_NAME_MAX);
|
||||
} else {
|
||||
strncpy(name, r + 1, PROCFS_NAME_MAX);
|
||||
}
|
||||
dprintk("found and remove %s from the list.\n", name);
|
||||
remove_proc_entry(name, parent);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
|
||||
dprintk("procfs_delete: done\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Process SCD_MSG_PROCFS_ANSWER message.
|
||||
*
|
||||
* \param arg sent argument
|
||||
* \param err error info (redundant)
|
||||
*/
|
||||
|
||||
void procfs_answer(unsigned int arg, int err)
|
||||
{
|
||||
dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err);
|
||||
wake_up_interruptible(&procfsq);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The callback funciton for McKernel procfs
|
||||
*
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
|
||||
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
|
||||
int count, int *peof, void *dat)
|
||||
{
|
||||
struct procfs_list_entry *e = dat;
|
||||
volatile struct procfs_read *r;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret, retrycount = 0;
|
||||
unsigned long pbuf;
|
||||
|
||||
dprintk("mckernel_procfs_read: invoked for %s\n", e->fname);
|
||||
|
||||
if (count <= 0 || dat == NULL || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(buffer);
|
||||
if (pbuf / PAGE_SIZE != (pbuf + count - 1) / PAGE_SIZE) {
|
||||
/* Truncate the read count upto the nearest page boundary */
|
||||
count = ((pbuf + count - 1) / PAGE_SIZE) * PAGE_SIZE - pbuf;
|
||||
}
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
retry:
|
||||
dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu);
|
||||
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
strncpy((char *)r->fname, e->fname, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = e->cpu;
|
||||
isp.arg = virt_to_phys(r);
|
||||
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
if ((r->ret == 0) && (r->eof != 1)) {
|
||||
/* A miss-hit caused by migration has occurred.
|
||||
* We simply retry the query with a new CPU.
|
||||
*/
|
||||
if (retrycount++ > 10) {
|
||||
kprintf("ERROR: mckernel_procfs_read: excessive retry.\n");
|
||||
goto out;
|
||||
}
|
||||
e->cpu = r->newcpu;
|
||||
dprintk("retry\n");
|
||||
goto retry;
|
||||
}
|
||||
if (r->eof == 1) {
|
||||
dprintk("reached end of file.\n");
|
||||
*peof = 1;
|
||||
}
|
||||
*start = buffer;
|
||||
ret = r->ret;
|
||||
out:
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initialization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
|
||||
void procfs_init(int osnum) {
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Finalization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
|
||||
void procfs_exit(int osnum) {
|
||||
char buf[20], *r;
|
||||
int error;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
struct kstat stat;
|
||||
struct proc_dir_entry *parent;
|
||||
struct procfs_list_entry *e, *temp = NULL;
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("remove remaining mckernel procfs files.\n");
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry_safe(e, temp, &procfs_file_list, list) {
|
||||
if (e->osnum == osnum) {
|
||||
dprintk("found entry for %s.\n", e->fname);
|
||||
list_del(&e->list);
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
parent = e->parent;
|
||||
r = strrchr(e->fname, '/');
|
||||
if (r == NULL) {
|
||||
r = e->fname;
|
||||
} else {
|
||||
r += 1;
|
||||
}
|
||||
remove_proc_entry(r, parent);
|
||||
dprintk("free the entry\n");
|
||||
kfree(e);
|
||||
}
|
||||
dprintk("iterate it.\n");
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
sprintf(buf, "/proc/mcos%d", osnum);
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
error = vfs_stat (buf, &stat);
|
||||
set_fs(old_fs);
|
||||
if (error != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
printk("procfs_exit: We have to remove unexpectedly remaining %s.\n", buf);
|
||||
|
||||
/* remove remnant of previous mcos%d */
|
||||
remove_proc_entry(buf + 6, NULL);
|
||||
}
|
||||
@ -1,13 +1,19 @@
|
||||
CC=@CC@
|
||||
BINDIR=@BINDIR@
|
||||
CFLAGS=-Wall -O -fPIE -pie
|
||||
KDIR ?= @KDIR@
|
||||
CFLAGS=-Wall -O -I.
|
||||
VPATH=@abs_srcdir@
|
||||
TARGET=mcexec
|
||||
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
|
||||
LIBS=@LIBS@
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
mcexec: mcexec.c
|
||||
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -pthread -o $@ $^ $(EXTRA_OBJS)
|
||||
$(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -lrt -pthread -o $@ $^ $(EXTRA_OBJS)
|
||||
|
||||
eclair: eclair.c
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
clean:
|
||||
$(RM) $(TARGET) *.o
|
||||
@ -17,4 +23,5 @@ clean:
|
||||
install:
|
||||
mkdir -p -m 755 $(BINDIR)
|
||||
install -m 755 mcexec $(BINDIR)
|
||||
@uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR)
|
||||
|
||||
|
||||
1070
executer/user/eclair.c
Normal file
1070
executer/user/eclair.c
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,7 @@ OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
CFLAGS += -I$(SRC)/include -D__KERNEL__
|
||||
CFLAGS += -DKNC_MAP_MICPA $(EXTRA_CFLAGS)
|
||||
|
||||
ifeq ("$(DCFA_MODE)", "kmod")
|
||||
|
||||
@ -3,15 +3,15 @@ SRC=$(VPATH)
|
||||
IHKDIR=$(IHKBASE)/$(TARGETDIR)
|
||||
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o sysfs.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
CFLAGS += -I$(SRC)/include -D__KERNEL__ -g -fno-omit-frame-pointer -fno-inline -fno-inline-small-functions
|
||||
LDFLAGS += -e arch_start
|
||||
IHKOBJ = ihk/ihk.o
|
||||
|
||||
include $(SRC)/config/config.$(TARGET)
|
||||
include $(IHKBASE)/Makefile.common
|
||||
include @abs_builddir@/../../ihk/cokernel/Makefile.common
|
||||
|
||||
# CFLAGS += -I$(SRC)/../arch/$(IHKARCH)/kernel/include -I$(SRC)/../lib/include
|
||||
|
||||
|
||||
@ -9,7 +9,7 @@ V ?= $(VERBOSE)
|
||||
KERNEL = kernel.img
|
||||
KERNELS = $(addsuffix /$(KERNEL),$(addprefix $(O)/,$(BUILD_TARGET)))
|
||||
|
||||
SUBCMD_OPTS = V='$(V)'
|
||||
SUBCMD_OPTS = V='$(V)' BUILD_IHK_COKERNEL=@abs_builddir@/../../ihk/cokernel
|
||||
|
||||
$(if $(O),,$(error Specify the compilation target directory))
|
||||
#$(if $(shell ls $(IHKBASE)/Makefile),,\
|
||||
|
||||
168
kernel/ap.c
168
kernel/ap.c
@ -24,20 +24,23 @@
|
||||
#include <process.h>
|
||||
#include <init.h>
|
||||
#include <march.h>
|
||||
#include <cls.h>
|
||||
|
||||
int num_processors = 1;
|
||||
static volatile int ap_stop = 1;
|
||||
|
||||
static void ap_wait(void)
|
||||
{
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
|
||||
init_tick();
|
||||
while (ap_stop) {
|
||||
barrier();
|
||||
cpu_pause();
|
||||
}
|
||||
sync_tick();
|
||||
|
||||
kmalloc_init();
|
||||
sched_init();
|
||||
arch_start_pvclock();
|
||||
|
||||
if (find_command_line("hidos")) {
|
||||
init_host_syscall_channel();
|
||||
@ -53,19 +56,20 @@ static void ap_wait(void)
|
||||
|
||||
void ap_start(void)
|
||||
{
|
||||
init_tick();
|
||||
ap_stop = 0;
|
||||
sync_tick();
|
||||
}
|
||||
|
||||
void ap_init(void)
|
||||
{
|
||||
struct ihk_mc_cpu_info *cpu_info;
|
||||
int i;
|
||||
int bsp_hw_id;
|
||||
int bsp_hw_id, bsp_cpu_id;
|
||||
|
||||
ihk_mc_init_ap();
|
||||
init_delay();
|
||||
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
|
||||
cpu_info = ihk_mc_get_cpu_info();
|
||||
bsp_hw_id = ihk_mc_get_hardware_processor_id();
|
||||
|
||||
@ -74,18 +78,164 @@ void ap_init(void)
|
||||
return;
|
||||
}
|
||||
|
||||
kprintf("BSP HW ID = %d, ", bsp_hw_id);
|
||||
kprintf("AP Booting :");
|
||||
bsp_cpu_id = 0;
|
||||
for (i = 0; i < cpu_info->ncpus; ++i) {
|
||||
if (cpu_info->hw_ids[i] == bsp_hw_id) {
|
||||
bsp_cpu_id = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
kprintf("BSP: %d (HW ID: %d @ NUMA %d)\n", bsp_cpu_id,
|
||||
bsp_hw_id, cpu_info->nodes[0]);
|
||||
|
||||
for (i = 0; i < cpu_info->ncpus; i++) {
|
||||
if (cpu_info->hw_ids[i] == bsp_hw_id) {
|
||||
continue;
|
||||
}
|
||||
kprintf("AP Booting: %d (HW ID: %d @ NUMA %d)\n", i,
|
||||
cpu_info->hw_ids[i], cpu_info->nodes[i]);
|
||||
ihk_mc_boot_cpu(cpu_info->hw_ids[i], (unsigned long)ap_wait);
|
||||
kprintf(" %d", cpu_info->hw_ids[i]);
|
||||
|
||||
num_processors++;
|
||||
}
|
||||
kprintf(" .. Done\n");
|
||||
kprintf("AP Booting: Done\n");
|
||||
}
|
||||
|
||||
#include <sysfs.h>
|
||||
#include <kmalloc.h>
|
||||
#include <string.h>
|
||||
#include <vsprintf.h>
|
||||
|
||||
static ssize_t
|
||||
show_int(struct sysfs_ops *ops, void *instance, void *buf, size_t size)
|
||||
{
|
||||
int *p = instance;
|
||||
|
||||
return snprintf(buf, size, "%d\n", *p);
|
||||
}/* show_int() */
|
||||
|
||||
struct sysfs_ops show_int_ops = {
|
||||
.show = &show_int,
|
||||
};
|
||||
|
||||
struct fake_cpu_info {
|
||||
int online;
|
||||
};
|
||||
|
||||
static struct fake_cpu_info *fake_cpu_infos = NULL;
|
||||
|
||||
enum fake_cpu_info_member {
|
||||
ONLINE,
|
||||
};
|
||||
|
||||
struct fake_cpu_info_ops {
|
||||
enum fake_cpu_info_member member;
|
||||
struct sysfs_ops ops;
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
show_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct fake_cpu_info_ops *ops
|
||||
= container_of(ops0, struct fake_cpu_info_ops, ops);
|
||||
struct fake_cpu_info *info = instance;
|
||||
ssize_t n;
|
||||
|
||||
switch (ops->member) {
|
||||
case ONLINE:
|
||||
n = snprintf(buf, size, "%d\n", info->online);
|
||||
break;
|
||||
default:
|
||||
n = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (n >= size) {
|
||||
n = -ENOSPC;
|
||||
}
|
||||
|
||||
return n;
|
||||
} /* show_fake_cpu_info() */
|
||||
|
||||
static ssize_t
|
||||
store_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct fake_cpu_info_ops *ops
|
||||
= container_of(ops0, struct fake_cpu_info_ops, ops);
|
||||
struct fake_cpu_info *info = instance;
|
||||
ssize_t n;
|
||||
|
||||
switch (ops->member) {
|
||||
case ONLINE:
|
||||
kprintf("NYI:store_fake_cpu_info(%p,%p,%p,%ld): "
|
||||
"online %d --> \"%.*s\"\n",
|
||||
ops0, instance, buf, size, info->online,
|
||||
(int)size, buf);
|
||||
n = size;
|
||||
break;
|
||||
default:
|
||||
n = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
return n;
|
||||
} /* store_fake_cpu_info() */
|
||||
|
||||
static struct fake_cpu_info_ops show_fci_online = {
|
||||
.member = ONLINE,
|
||||
.ops.show = &show_fake_cpu_info,
|
||||
.ops.store = &store_fake_cpu_info,
|
||||
};
|
||||
|
||||
void
|
||||
cpu_sysfs_setup(void)
|
||||
{
|
||||
int error;
|
||||
int cpu;
|
||||
sysfs_handle_t targeth;
|
||||
struct fake_cpu_info *info;
|
||||
|
||||
/* sample of simple variable **********************************/
|
||||
error = sysfs_createf(&show_int_ops, &num_processors, 0444,
|
||||
"/sys/devices/system/cpu/num_processors");
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_createf(num_processors) failed\n");
|
||||
}
|
||||
|
||||
/* sample of more complex variable ****************************/
|
||||
/* setup table */
|
||||
info = kmalloc(sizeof(*info) * num_processors, IHK_MC_AP_CRITICAL);
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
info[cpu].online = 1;
|
||||
}
|
||||
fake_cpu_infos = info;
|
||||
|
||||
/* setup sysfs tree */
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
/* online */
|
||||
error = sysfs_createf(&show_fci_online.ops,
|
||||
&fake_cpu_infos[cpu], 0644,
|
||||
"/sys/devices/system/cpu/cpu%d/online", cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_createf failed\n");
|
||||
}
|
||||
|
||||
/* link to cpu%d */
|
||||
error = sysfs_lookupf(&targeth,
|
||||
"/sys/devices/system/cpu/cpu%d", cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_lookupf failed\n");
|
||||
}
|
||||
|
||||
error = sysfs_symlinkf(targeth, "/sys/bus/cpu/devices/cpu%d",
|
||||
cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_symlinkf failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
} /* cpu_sysfs_setup() */
|
||||
|
||||
16
kernel/cls.c
16
kernel/cls.c
@ -23,6 +23,7 @@
|
||||
extern int num_processors;
|
||||
|
||||
struct cpu_local_var *clv;
|
||||
static int cpu_local_var_initialized = 0;
|
||||
|
||||
void cpu_local_var_init(void)
|
||||
{
|
||||
@ -31,11 +32,24 @@ void cpu_local_var_init(void)
|
||||
z = sizeof(struct cpu_local_var) * num_processors;
|
||||
z = (z + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
|
||||
clv = allocate_pages(z, IHK_MC_AP_CRITICAL);
|
||||
clv = ihk_mc_alloc_pages(z, IHK_MC_AP_CRITICAL);
|
||||
memset(clv, 0, z * PAGE_SIZE);
|
||||
cpu_local_var_initialized = 1;
|
||||
}
|
||||
|
||||
struct cpu_local_var *get_cpu_local_var(int id)
|
||||
{
|
||||
return clv + id;
|
||||
}
|
||||
|
||||
void preempt_enable(void)
|
||||
{
|
||||
if (cpu_local_var_initialized)
|
||||
--cpu_local_var(no_preempt);
|
||||
}
|
||||
|
||||
void preempt_disable(void)
|
||||
{
|
||||
if (cpu_local_var_initialized)
|
||||
++cpu_local_var(no_preempt);
|
||||
}
|
||||
|
||||
@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
|
||||
@ -26,10 +26,14 @@ SECTIONS
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
@ -39,8 +43,4 @@ SECTIONS
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.eh_frame)
|
||||
*(.note.gnu.build-id)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
CC = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-gcc
|
||||
LD = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-ld
|
||||
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -m elf_k1om -T $(SRC)/config/attached-mic.lds
|
||||
LDFLAGS_MKIMAGE = -m elf_k1om
|
||||
|
||||
@ -3,6 +3,5 @@ LD = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-ld
|
||||
OBJDUMP = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-objdump
|
||||
OBJCOPY = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-objcopy
|
||||
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -m elf_k1om -T $(SRC)/config/builtin-mic.lds
|
||||
LDFLAGS_MKIMAGE = -m elf_k1om
|
||||
|
||||
@ -1,2 +1 @@
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -T $(SRC)/config/builtin-x86.lds
|
||||
|
||||
1
kernel/config/config.smp-x86
Normal file
1
kernel/config/config.smp-x86
Normal file
@ -0,0 +1 @@
|
||||
LDFLAGS += -T $(SRC)/config/smp-x86.lds
|
||||
45
kernel/config/smp-x86.lds
Normal file
45
kernel/config/smp-x86.lds
Normal file
@ -0,0 +1,45 @@
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FLAGS(5);
|
||||
data PT_LOAD FLAGS(7);
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
. = 0xffffffff80001000;
|
||||
_head = .;
|
||||
|
||||
.text : {
|
||||
*(.text);
|
||||
} : text
|
||||
|
||||
. = ALIGN(4096);
|
||||
.data : {
|
||||
*(.data)
|
||||
*(.data.*)
|
||||
} :data
|
||||
.rodata : {
|
||||
*(.rodata .rodata.*)
|
||||
} :data
|
||||
|
||||
.vsyscall : ALIGN(0x1000) {
|
||||
vsyscall_page = .;
|
||||
|
||||
. = vsyscall_page + 0x000;
|
||||
*(.vsyscall.gettimeofday)
|
||||
*(.vsyscall.gettimeofday.*)
|
||||
|
||||
. = vsyscall_page + 0x400;
|
||||
*(.vsyscall.time)
|
||||
|
||||
. = vsyscall_page + 0x800;
|
||||
*(.vsyscall.getcpu)
|
||||
|
||||
. = ALIGN(4096);
|
||||
} : data = 0xf4
|
||||
|
||||
.bss : {
|
||||
*(.bss .bss.*)
|
||||
}
|
||||
. = ALIGN(4096);
|
||||
_end = .;
|
||||
}
|
||||
153
kernel/debug.c
153
kernel/debug.c
@ -22,13 +22,46 @@ extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
|
||||
extern int sprintf(char * buf, const char *fmt, ...);
|
||||
static ihk_spinlock_t kmsg_lock;
|
||||
|
||||
static unsigned long kprintf_lock_head(void);
|
||||
static void kprintf_unlock_head(unsigned long irqflags);
|
||||
|
||||
static void kprintf_wait(int len, unsigned long *flags_head, int *slide) {
|
||||
int head, tail, buf_len, mode, adj;
|
||||
|
||||
mode = kmsg_buf.mode;
|
||||
while (1) {
|
||||
adj = 0;
|
||||
tail = kmsg_buf.tail;
|
||||
buf_len = kmsg_buf.len;
|
||||
head = kmsg_buf.head;
|
||||
if (head < tail) head += buf_len;
|
||||
if (tail + len > buf_len) adj = buf_len - tail;
|
||||
if (head > tail && head <= tail + len + adj) {
|
||||
/* When proceeding tail (producer pointer) by len would
|
||||
cross head (consumer pointer) in ring-buffer */
|
||||
if (mode != 1) {
|
||||
*slide = 1;
|
||||
break;
|
||||
} else {
|
||||
kprintf_unlock_head(*flags_head);
|
||||
*flags_head = kprintf_lock_head();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: lock */
|
||||
void kputs(char *buf)
|
||||
{
|
||||
int len = strlen(buf);
|
||||
unsigned long flags;
|
||||
int slide = 0;
|
||||
unsigned long flags_tail, flags_head;
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
flags_tail = kprintf_lock();
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
if (len + kmsg_buf.tail > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
@ -39,60 +72,57 @@ void kputs(char *buf)
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
/* When proceeding tail (producer pointer) by len would
|
||||
cross head (consumer pointer) in ring-buffer, give up
|
||||
[head, tail] because the range is overwritten */
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
kprintf_unlock_head(flags_head);
|
||||
kprintf_unlock(flags_tail);
|
||||
}
|
||||
|
||||
#define KPRINTF_LOCAL_BUF_LEN 1024
|
||||
|
||||
int kprintf_lock()
|
||||
unsigned long kprintf_lock(void)
|
||||
{
|
||||
return ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
return __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
}
|
||||
|
||||
void kprintf_unlock(int irqflags)
|
||||
void kprintf_unlock(unsigned long irqflags)
|
||||
{
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
}
|
||||
|
||||
static unsigned long kprintf_lock_head(void)
|
||||
{
|
||||
return __ihk_mc_spinlock_lock(&kmsg_buf.lock);
|
||||
}
|
||||
|
||||
static void kprintf_unlock_head(unsigned long irqflags)
|
||||
{
|
||||
__ihk_mc_spinlock_unlock(&kmsg_buf.lock, irqflags);
|
||||
}
|
||||
|
||||
/* Caller must hold kmsg_lock! */
|
||||
int __kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
int slide = 0;
|
||||
va_list va;
|
||||
unsigned long flags_head;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
/* Copy into the local buf */
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
}
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
int kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
va_list va;
|
||||
unsigned long flags;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
|
||||
/* Copy into the local buf */
|
||||
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
@ -100,16 +130,69 @@ int kprintf(const char *format, ...)
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
kprintf_unlock_head(flags_head);
|
||||
return len;
|
||||
}
|
||||
|
||||
int kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
int slide = 0;
|
||||
va_list va;
|
||||
unsigned long flags_tail, flags_head;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
/* Copy into the local buf */
|
||||
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
flags_tail = kprintf_lock();
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
}
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
|
||||
kprintf_unlock_head(flags_head);
|
||||
kprintf_unlock(flags_tail);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
void kmsg_init(void)
|
||||
/* mode:
|
||||
0: mcklogd is not running.
|
||||
When kmsg buffer is full, writer doesn't block
|
||||
and overwrites the buffer.
|
||||
1: mcklogd periodically retrieves kmsg.
|
||||
When kmsg buffer is full, writer blocks until
|
||||
someone retrieves kmsg.
|
||||
2: mcklogd periodically retrieves kmsg.
|
||||
When kmsg buffer is full, writer doesn't block
|
||||
and overwrites the buffer.
|
||||
*/
|
||||
void kmsg_init(int mode)
|
||||
{
|
||||
ihk_mc_spinlock_init(&kmsg_lock);
|
||||
kmsg_buf.tail = 0;
|
||||
kmsg_buf.len = sizeof(kmsg_buf.str);
|
||||
kmsg_buf.head = 0;
|
||||
kmsg_buf.mode = mode;
|
||||
ihk_mc_spinlock_init(&kmsg_buf.lock);
|
||||
memset(kmsg_buf.str, 0, kmsg_buf.len);
|
||||
}
|
||||
|
||||
@ -3,7 +3,8 @@
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* memory mapped device pager client
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2014 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
@ -32,9 +33,18 @@
|
||||
#include <pager.h>
|
||||
#include <string.h>
|
||||
#include <syscall.h>
|
||||
#include <process.h>
|
||||
|
||||
//#define DEBUG_PRINT_DEVOBJ
|
||||
|
||||
#ifdef DEBUG_PRINT_DEVOBJ
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define dkprintf(...)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
|
||||
struct devobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
@ -68,51 +78,52 @@ static struct memobj *to_memobj(struct devobj *devobj)
|
||||
/***********************************************************************
|
||||
* devobj
|
||||
*/
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp)
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||
int prot, int populate_flags)
|
||||
{
|
||||
ihk_mc_user_context_t ctx;
|
||||
struct pager_map_result result; // XXX: assumes contiguous physical
|
||||
int error;
|
||||
struct devobj *obj = NULL;
|
||||
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||
|
||||
kprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
|
||||
if (npages > MAX_PAGES_IN_DEVOBJ) {
|
||||
error = -EFBIG;
|
||||
kprintf("devobj_create(%d,%lx,%lx):too large len. %d\n", fd, len, off, error);
|
||||
goto out;
|
||||
}
|
||||
dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off);
|
||||
|
||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||
if (!obj) {
|
||||
error = -ENOMEM;
|
||||
kprintf("devobj_create(%d,%lx,%lx):kmalloc failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu kmalloc failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
|
||||
obj->pfn_table = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
||||
obj->pfn_table = ihk_mc_alloc_pages(pfn_npages, IHK_MC_AP_NOWAIT);
|
||||
if (!obj->pfn_table) {
|
||||
error = -ENOMEM;
|
||||
kprintf("devobj_create(%d,%lx,%lx):allocate_pages failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
memset(obj->pfn_table, 0, 1*PAGE_SIZE);
|
||||
memset(obj->pfn_table, 0, pfn_npages * PAGE_SIZE);
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_MAP;
|
||||
ihk_mc_syscall_arg1(&ctx) = fd;
|
||||
ihk_mc_syscall_arg2(&ctx) = len;
|
||||
ihk_mc_syscall_arg3(&ctx) = off;
|
||||
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
|
||||
ihk_mc_syscall_arg5(&ctx) = prot | populate_flags;
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
kprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
kprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
|
||||
dkprintf("%s: fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x\n",
|
||||
__FUNCTION__, fd, len, off, result.handle, result.maxprot);
|
||||
|
||||
obj->memobj.ops = &devobj_ops;
|
||||
obj->memobj.flags = MF_HAS_PAGER;
|
||||
@ -130,11 +141,12 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
out:
|
||||
if (obj) {
|
||||
if (obj->pfn_table) {
|
||||
free_pages(obj->pfn_table, 1);
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
kfree(obj);
|
||||
}
|
||||
kprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
dkprintf("%s: ret: %d, fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x \n",
|
||||
__FUNCTION__, error, fd, len, off, result.handle, result.maxprot);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -142,7 +154,7 @@ static void devobj_ref(struct memobj *memobj)
|
||||
{
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
|
||||
kprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
|
||||
memobj_lock(&obj->memobj);
|
||||
++obj->ref;
|
||||
memobj_unlock(&obj->memobj);
|
||||
@ -154,8 +166,10 @@ static void devobj_release(struct memobj *memobj)
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
struct devobj *free_obj = NULL;
|
||||
uintptr_t handle;
|
||||
const size_t pfn_npages =
|
||||
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||
|
||||
kprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
--obj->ref;
|
||||
@ -182,19 +196,19 @@ static void devobj_release(struct memobj *memobj)
|
||||
}
|
||||
|
||||
if (obj->pfn_table) {
|
||||
free_pages(obj->pfn_table, 1);
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
kfree(free_obj);
|
||||
}
|
||||
|
||||
kprintf("devobj_release(%p %lx):free %p\n",
|
||||
dkprintf("devobj_release(%p %lx):free %p\n",
|
||||
obj, handle, free_obj);
|
||||
return;
|
||||
}
|
||||
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
||||
{
|
||||
const off_t pgoff = off >> PAGE_SHIFT;
|
||||
const off_t pgoff = off / PAGE_SIZE;
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
int error;
|
||||
uintptr_t pfn;
|
||||
@ -202,15 +216,15 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
ihk_mc_user_context_t ctx;
|
||||
int ix;
|
||||
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
|
||||
|
||||
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
||||
error = -EFBIG;
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d): out of range. %d\n", memobj, obj->handle, off, p2align, error);
|
||||
kprintf("%s: error: out of range: off: %lu, page off: %lu obj->npages: %d\n", __FUNCTION__, off, pgoff, obj->npages);
|
||||
goto out;
|
||||
}
|
||||
ix = pgoff - obj->pfn_pgoff;
|
||||
kprintf("ix: %ld\n", ix);
|
||||
dkprintf("ix: %ld\n", ix);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
pfn = obj->pfn_table[ix];
|
||||
@ -230,12 +244,20 @@ kprintf("ix: %ld\n", ix);
|
||||
|
||||
if (pfn & PFN_PRESENT) {
|
||||
/* convert remote physical into local physical */
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
attr = pfn & ~PFN_PFN;
|
||||
|
||||
/* TODO: do an arch dependent PTE to mapping flag conversion
|
||||
* instead of this inline check, also, we rely on having the
|
||||
* same PAT config as Linux here.. */
|
||||
if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) {
|
||||
*flag |= VR_WRITE_COMBINED;
|
||||
}
|
||||
|
||||
pfn = ihk_mc_map_memory(NULL, (pfn & PFN_PFN), PAGE_SIZE);
|
||||
pfn &= PFN_PFN;
|
||||
pfn |= attr;
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
|
||||
}
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
@ -253,6 +275,6 @@ kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->
|
||||
*physp = pfn & PFN_PFN;
|
||||
|
||||
out:
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
dkprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
|
||||
return error;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user