[linux-next] LinuxNextTracking branch, master, updated. next-20140709

9 Jul 2014

The following commit has been merged in the master branch:
commit f3105fd38ff332967eaf92abe7edcdd0cdd9ff42
Merge: a7a85ec518094c4504c5a1c99a1ee37cb53c3359 0ffdd21bf6505df7f27c2a44a7ba78269d133d01
Author: Stephen Rothwell sfr@canb.auug.org.au
Date:   Wed Jul 9 17:37:22 2014 +1000
Merge branch 'akpm-current/current'
Conflicts:
    	mm/shmem.c
diff --combined Documentation/kernel-parameters.txt
index a9ccb53,80746b2..ad7e59a

--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -566,11 -566,6 +566,11 @@@ bytes respectively. Such letter suffixe
    		possible to determine what the correct size should be.
    		This option provides an override for these situations.
+	ca_keys=	[KEYS] This parameter identifies a specific key(s) on
 +			the system trusted keyring to be used for certificate
 +			trust validation.
 +			format: { id:<keyid> | builtin }
 +
    ccw_timeout_log [S390]
    		See Documentation/s390/CommonIO for details.
@@@ -1421,6 -1416,10 +1421,6 @@@
    ip=		[IP_PNP]
    		See Documentation/filesystems/nfs/nfsroot.txt.
-	ip2=		[HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
 -			See comment before ip2_setup() in
 -			drivers/char/ip2/ip2base.c.
 -
    irqfixup	[HW]
    		When an interrupt is not handled search all handlers
    		for it. Intended to get systems with badly broken
@@@ -1693,8 -1692,12 +1693,12 @@@
    		7 (KERN_DEBUG)		debug-level messages
log_buf_len=n[KMG]	Sets the size of the printk ring buffer,
- 			in bytes.  n must be a power of two.  The default
- 			size is set in the kernel config file.
+ 			in bytes.  n must be a power of two and greater
+ 			than the minimal size. The minimal size is defined
+ 			by LOG_BUF_SHIFT kernel config parameter. There is
+ 			also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+ 			that allows to increase the default size depending on
+ 			the number of CPUs. See init/Kconfig for more details.
logo.nologo	[FB] Disables display of the built-in Linux logo.
    		This may be used to provide more screen space for
@@@ -2167,21 -2170,6 +2171,21 @@@
    		and restore using xsave. The kernel will fallback to
    		enabling legacy floating-point and sse state.
+	noxsaveopt	[X86] Disables xsaveopt used in saving x86 extended
 +			register states. The kernel will fall back to use
 +			xsave to save the states. By using this parameter,
 +			performance of saving the states is degraded because
 +			xsave doesn't support modified optimization while
 +			xsaveopt supports it on xsaveopt enabled systems.
 +
 +	noxsaves	[X86] Disables xsaves and xrstors used in saving and
 +			restoring x86 extended register state in compacted
 +			form of xsave area. The kernel will fall back to use
 +			xsaveopt and xrstor to save and restore the states
 +			in standard form of xsave area. By using this
 +			parameter, xsave area per process might occupy more
 +			memory on xsaves enabled systems.
 +
    eagerfpu=	[X86]
    		on	enable eager fpu restore
    		off	disable eager fpu restore
@@@ -2806,12 -2794,6 +2810,12 @@@
    		leaf rcu_node structure.  Useful for very large
    		systems.
+	rcutree.jiffies_till_sched_qs= [KNL]
 +			Set required age in jiffies for a
 +			given grace period before RCU starts
 +			soliciting quiescent-state help from
 +			rcu_note_context_switch().
 +
    rcutree.jiffies_till_first_fqs= [KNL]
    		Set delay from grace-period initialization to
    		first attempt to force quiescent states.
@@@ -2823,13 -2805,6 +2827,13 @@@
    		quiescent states.  Units are jiffies, minimum
    		value is one, and maximum value is HZ.
+	rcutree.rcu_nocb_leader_stride= [KNL]
 +			Set the number of NOCB kthread groups, which
 +			defaults to the square root of the number of
 +			CPUs.  Larger numbers reduces the wakeup overhead
 +			on the per-CPU grace-period kthreads, but increases
 +			that same overhead on each group's leader.
 +
    rcutree.qhimark= [KNL]
    		Set threshold of queued RCU callbacks beyond which
    		batch limiting is disabled.
diff --combined MAINTAINERS
index 2a7aaa9,dc5ed06..eb8d728
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -70,8 -70,6 +70,8 @@@ Descriptions of section entries
P: Person (obsolete)
    M: Mail patches to: FullName address@domain
 +	R: Designated reviewer: FullName address@domain
 +	   These reviewers should be CCed on patches.
    L: Mailing list that is relevant to this area
    W: Web-page with status/info
    Q: Patchwork web based patch tracking system site
@@@ -589,7 -587,7 +589,7 @@@ W:	http://www.amd.com/us-en/Connectivit
  S:	Supported
  F:	drivers/char/hw_random/geode-rng.c
  F:	drivers/crypto/geode*
 -F:	drivers/video/geode/
 +F:	drivers/video/fbdev/geode/
  F:	arch/x86/include/asm/geode.h
AMD IOMMU (AMD-VI)
@@@ -718,8 -716,8 +718,8 @@@ F:	drivers/ata/pata_arasan_cf.
  ARC FRAMEBUFFER DRIVER
  M:	Jaya Kumar jayalk@intworks.biz
  S:	Maintained
 -F:	drivers/video/arcfb.c
 -F:	drivers/video/fb_defio.c
 +F:	drivers/video/fbdev/arcfb.c
 +F:	drivers/video/fbdev/core/fb_defio.c
ARM MFM AND FLOPPY DRIVERS
  M:	Ian Molton spyro@f2s.com
@@@ -758,7 -756,7 +758,7 @@@ F:	sound/arm/aaci.
  ARM PRIMECELL CLCD PL110 DRIVER
  M:	Russell King linux@arm.linux.org.uk
  S:	Maintained
 -F:	drivers/video/amba-clcd.*
 +F:	drivers/video/fbdev/amba-clcd.*
ARM PRIMECELL KMI PL050 DRIVER
  M:	Russell King linux@arm.linux.org.uk
@@@ -945,10 -943,16 +945,10 @@@ L:	linux-arm-kernel@lists.infradead.or
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
  F:	arch/arm/mach-imx/
 +F:	arch/arm/mach-mxs/
  F:	arch/arm/boot/dts/imx*
  F:	arch/arm/configs/imx*_defconfig
-ARM/FREESCALE MXS ARM ARCHITECTURE
 -M:	Shawn Guo shawn.guo@linaro.org
 -L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 -S:	Maintained
 -T:	git git://git.linaro.org/people/shawnguo/linux-2.6.git
 -F:	arch/arm/mach-mxs/
 -
  ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
  M:	Lennert Buytenhek kernel@wantstofly.org
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1048,33 -1052,9 +1048,33 @@@ M:	Santosh Shilimkar <santosh.shilimkar
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-keystone/
 -F:	drivers/clk/keystone/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
 +M:	Santosh Shilimkar santosh.shilimkar@ti.com
 +L:	linux-kernel@vger.kernel.org
 +S:	Maintained
 +F:	drivers/clk/keystone/
 +
 +ARM/TEXAS INSTRUMENT KEYSTONE ClOCKSOURCE
 +M:	Santosh Shilimkar santosh.shilimkar@ti.com
 +L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +L:	linux-kernel@vger.kernel.org
 +S:	Maintained
 +F:	drivers/clocksource/timer-keystone.c
 +
 +ARM/TEXAS INSTRUMENT KEYSTONE RESET DRIVER
 +M:	Santosh Shilimkar santosh.shilimkar@ti.com
 +L:	linux-kernel@vger.kernel.org
 +S:	Maintained
 +F:	drivers/power/reset/keystone-reset.c
 +
 +ARM/TEXAS INSTRUMENT AEMIF/EMIF DRIVERS
 +M:	Santosh Shilimkar santosh.shilimkar@ti.com
 +L:	linux-kernel@vger.kernel.org
 +S:	Maintained
 +F:	drivers/memory/*emif*
 +
  ARM/LOGICPD PXA270 MACHINE SUPPORT
  M:	Lennert Buytenhek kernel@wantstofly.org
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1155,7 -1135,7 +1155,7 @@@ M:	Daniel Walker <dwalker@fifo99.com
  M:	Bryan Huntsman bryanh@codeaurora.org
  L:	linux-arm-msm@vger.kernel.org
  F:	arch/arm/mach-msm/
 -F:	drivers/video/msm/
 +F:	drivers/video/fbdev/msm/
  F:	drivers/mmc/host/msm_sdcc.c
  F:	drivers/mmc/host/msm_sdcc.h
  F:	drivers/tty/serial/msm_serial.h
@@@ -1316,20 -1296,6 +1316,20 @@@ W:	http://oss.renesas.co
  Q:	http://patchwork.kernel.org/project/linux-sh/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
  S:	Supported
 +F:	arch/arm/boot/dts/emev2*
 +F:	arch/arm/boot/dts/r7s*
 +F:	arch/arm/boot/dts/r8a*
 +F:	arch/arm/boot/dts/sh*
 +F:	arch/arm/configs/ape6evm_defconfig
 +F:	arch/arm/configs/armadillo800eva_defconfig
 +F:	arch/arm/configs/bockw_defconfig
 +F:	arch/arm/configs/genmai_defconfig
 +F:	arch/arm/configs/koelsch_defconfig
 +F:	arch/arm/configs/kzm9g_defconfig
 +F:	arch/arm/configs/lager_defconfig
 +F:	arch/arm/configs/mackerel_defconfig
 +F:	arch/arm/configs/marzen_defconfig
 +F:	arch/arm/configs/shmobile_defconfig
  F:	arch/arm/mach-shmobile/
  F:	drivers/sh/
@@@ -1389,7 -1355,7 +1389,7 @@@ F:	drivers/mtd/nand/nuc900_nand.
  F:	drivers/rtc/rtc-nuc900.c
  F:	drivers/spi/spi-nuc900.c
  F:	drivers/usb/host/ehci-w90x900.c
 -F:	drivers/video/nuc900fb.c
 +F:	drivers/video/fbdev/nuc900fb.c
ARM/U300 MACHINE SUPPORT
  M:	Linus Walleij linus.walleij@linaro.org
@@@ -1458,9 -1424,9 +1458,9 @@@ F:	drivers/rtc/rtc-vt8500.
  F:	drivers/tty/serial/vt8500_serial.c
  F:	drivers/usb/host/ehci-platform.c
  F:	drivers/usb/host/uhci-platform.c
 -F:	drivers/video/vt8500lcdfb.*
 -F:	drivers/video/wm8505fb*
 -F:	drivers/video/wmt_ge_rops.*
 +F:	drivers/video/fbdev/vt8500lcdfb.*
 +F:	drivers/video/fbdev/wm8505fb*
 +F:	drivers/video/fbdev/wmt_ge_rops.*
ARM/ZIPIT Z2 SUPPORT
  M:	Marek Vasut marek.vasut@gmail.com
@@@ -1650,7 -1616,7 +1650,7 @@@ ATMEL LCDFB DRIVE
  M:	Nicolas Ferre nicolas.ferre@atmel.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/atmel_lcdfb.c
 +F:	drivers/video/fbdev/atmel_lcdfb.c
  F:	include/video/atmel_lcdc.h
ATMEL MACB ETHERNET DRIVER
@@@ -1797,13 -1763,6 +1797,13 @@@ W:	http://bcache.evilpiepirate.or
  S:	Maintained:
  F:	drivers/md/bcache/
+BECEEM BCS200/BCS220-3/BCSM250 WIMAX SUPPORT
 +M: Kevin McKinney klmckinney1@gmail.com
 +M: Matthias Beyer mail@beyermatthias.de
 +L: devel@driverdev.osuosl.org
 +S: Maintained
 +F: drivers/staging/bcm*
 +
  BEFS FILE SYSTEM
  S:	Orphan
  F:	Documentation/filesystems/befs.txt
@@@ -1935,8 -1894,7 +1935,8 @@@ S:	Supporte
  F:	drivers/net/ethernet/broadcom/genet/
BROADCOM BNX2 GIGABIT ETHERNET DRIVER
 -M:	Michael Chan mchan@broadcom.com
 +M:	Sony Chacko sony.chacko@qlogic.com
 +M:	Dept-HSGLinuxNICDev@qlogic.com
  L:	netdev@vger.kernel.org
  S:	Supported
  F:	drivers/net/ethernet/broadcom/bnx2.*
@@@ -1981,7 -1939,7 +1981,7 @@@ F:	arch/arm/boot/dts/bcm5301x.dts
  F:	arch/arm/boot/dts/bcm470*
BROADCOM TG3 GIGABIT ETHERNET DRIVER
 -M:	Nithin Nayak Sujir nsujir@broadcom.com
 +M:	Prashant Sreedharan prashant@broadcom.com
  M:	Michael Chan mchan@broadcom.com
  L:	netdev@vger.kernel.org
  S:	Supported
@@@ -2668,7 -2626,7 +2668,7 @@@ M:	Russell King <linux@arm.linux.org.uk
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  W:	http://www.arm.linux.org.uk/
  S:	Maintained
 -F:	drivers/video/cyber2000fb.*
 +F:	drivers/video/fbdev/cyber2000fb.*
CYCLADES ASYNC MUX DRIVER
  W:	http://www.cyclades.com/
@@@ -2905,7 -2863,7 +2905,7 @@@ M:	Bernie Thompson <bernie@plugable.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
  W:	http://plugable.com/category/projects/udlfb/
 -F:	drivers/video/udlfb.c
 +F:	drivers/video/fbdev/udlfb.c
  F:	include/video/udlfb.h
  F:	Documentation/fb/udlfb.txt
@@@ -2924,8 -2882,8 +2924,8 @@@ S:	Maintaine
  L:	linux-media@vger.kernel.org
  L:	dri-devel@lists.freedesktop.org
  L:	linaro-mm-sig@lists.linaro.org
 -F:	drivers/base/dma-buf*
 -F:	include/linux/dma-buf*
 +F:	drivers/dma-buf/
 +F:	include/linux/dma-buf* include/linux/reservation.h include/linux/*fence.h
  F:	Documentation/dma-buf-sharing.txt
  T:	git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
@@@ -3214,26 -3172,12 +3214,12 @@@ T:	git git://linuxtv.org/anttip/media_t
  S:	Maintained
  F:	drivers/media/tuners/e4000*
- EATA-DMA SCSI DRIVER
- M:	Michael Neuffer mike@i-Connect.Net
- L:	linux-eata@i-connect.net
- L:	linux-scsi@vger.kernel.org
- S:	Maintained
- F:	drivers/scsi/eata*
- 
  EATA ISA/EISA/PCI SCSI DRIVER
  M:	Dario Ballabio ballabio_dario@emc.com
  L:	linux-scsi@vger.kernel.org
  S:	Maintained
  F:	drivers/scsi/eata.c
- EATA-PIO SCSI DRIVER
- M:	Michael Neuffer mike@i-Connect.Net
- L:	linux-eata@i-connect.net
- L:	linux-scsi@vger.kernel.org
- S:	Maintained
- F:	drivers/scsi/eata_pio.*
- 
  EC100 MEDIA DRIVER
  M:	Antti Palosaari crope@iki.fi
  L:	linux-media@vger.kernel.org
@@@ -3422,7 -3366,7 +3408,7 @@@ EFIFB FRAMEBUFFER DRIVE
  L:	linux-fbdev@vger.kernel.org
  M:	Peter Jones pjones@redhat.com
  S:	Maintained
 -F:	drivers/video/efifb.c
 +F:	drivers/video/fbdev/efifb.c
EFS FILESYSTEM
  W:	http://aeschi.ch.eu.org/efs/
@@@ -3487,7 -3431,7 +3473,7 @@@ EPSON S1D13XXX FRAMEBUFFER DRIVE
  M:	Kristoffer Ericson kristoffer.ericson@gmail.com
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kristoffer/linux-hpc.git
 -F:	drivers/video/s1d13xxxfb.c
 +F:	drivers/video/fbdev/s1d13xxxfb.c
  F:	include/video/s1d13xxxfb.h
ETHERNET BRIDGE
@@@ -3565,7 -3509,7 +3551,7 @@@ M:	Donghwa Lee <dh09.lee@samsung.com
  M:	Kyungmin Park kyungmin.park@samsung.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/exynos/exynos_mipi*
 +F:	drivers/video/fbdev/exynos/exynos_mipi*
  F:	include/video/exynos_mipi*
F71805F HARDWARE MONITORING DRIVER
@@@ -3744,7 -3688,7 +3730,7 @@@ FREESCALE DIU FRAMEBUFFER DRIVE
  M:	Timur Tabi timur@tabi.org
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/fsl-diu-fb.*
 +F:	drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
  M:	Li Yang leoli@freescale.com
@@@ -3766,7 -3710,7 +3752,7 @@@ L:	linux-fbdev@vger.kernel.or
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	include/linux/platform_data/video-imxfb.h
 -F:	drivers/video/imxfb.c
 +F:	drivers/video/fbdev/imxfb.c
FREESCALE SOC FS_ENET DRIVER
  M:	Pantelis Antoniou pantelis.antoniou@gmail.com
@@@ -4185,7 -4129,7 +4171,7 @@@ M:	Ferenc Bakonyi <fero@drama.obuda.kan
  L:	linux-nvidia@lists.surfsouth.com
  W:	http://drama.obuda.kando.hu/~fero/cgi-bin/hgafb.shtml
  S:	Maintained
 -F:	drivers/video/hgafb.c
 +F:	drivers/video/fbdev/hgafb.c
HIBERNATION (aka Software Suspend, aka swsusp)
  M:	"Rafael J. Wysocki" rjw@rjwysocki.net
@@@ -4215,7 -4159,7 +4201,7 @@@ L:	linux-kernel@vger.kernel.or
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
  S:	Maintained
  F:	Documentation/timers/
 -F:	kernel/hrtimer.c
 +F:	kernel/time/hrtimer.c
  F:	kernel/time/clockevents.c
  F:	kernel/time/tick*.*
  F:	kernel/time/timer_*.c
@@@ -4327,7 -4271,7 +4313,7 @@@ F:	drivers/hv
  F:	drivers/input/serio/hyperv-keyboard.c
  F:	drivers/net/hyperv/
  F:	drivers/scsi/storvsc_drv.c
 -F:	drivers/video/hyperv_fb.c
 +F:	drivers/video/fbdev/hyperv_fb.c
  F:	include/linux/hyperv.h
  F:	tools/hv/
@@@ -4471,10 -4415,7 +4457,7 @@@ S:	Supporte
  F:	drivers/scsi/ibmvscsi/ibmvfc*
IBM ServeRAID RAID DRIVER
- P:	Jack Hammer
- M:	Dave Jeffery ipslinux@adaptec.com
- W:	http://www.developer.ibm.com/welcome/netfinity/serveraid.html
- S:	Supported
+ S:	Orphan
  F:	drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER
@@@ -4521,7 -4462,8 +4504,7 @@@ S:	Supporte
  F:	drivers/idle/i7300_idle.c
IEEE 802.15.4 SUBSYSTEM
 -M:	Alexander Smirnov alex.bluesman.smirnov@gmail.com
 -M:	Dmitry Eremin-Solenikov dbaryshkov@gmail.com
 +M:	Alexander Aring alex.aring@gmail.com
  L:	linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers)
  W:	http://apps.sourceforge.net/trac/linux-zigbee
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git
@@@ -4586,7 -4528,7 +4569,7 @@@ F:	security/integrity/ima
  IMS TWINTURBO FRAMEBUFFER DRIVER
  L:	linux-fbdev@vger.kernel.org
  S:	Orphan
 -F:	drivers/video/imsttfb.c
 +F:	drivers/video/fbdev/imsttfb.c
INFINIBAND SUBSYSTEM
  M:	Roland Dreier roland@kernel.org
@@@ -4653,13 -4595,13 +4636,13 @@@ M:	Maik Broemme <mbroemme@plusserver.de
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
  F:	Documentation/fb/intelfb.txt
 -F:	drivers/video/intelfb/
 +F:	drivers/video/fbdev/intelfb/
INTEL 810/815 FRAMEBUFFER DRIVER
  M:	Antonino Daplas adaplas@gmail.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/i810/
 +F:	drivers/video/fbdev/i810/
INTEL MENLOW THERMAL DRIVER
  M:	Sujith Thomas sujith.thomas@intel.com
@@@ -5434,17 -5376,16 +5417,17 @@@ F:	arch/powerpc/*/*/*virtex
LINUX FOR POWERPC EMBEDDED PPC8XX
  M:	Vitaly Bordug vitb@kernel.crashing.org
 -M:	Marcelo Tosatti marcelo@kvack.org
  W:	http://www.penguinppc.org/
  L:	linuxppc-dev@lists.ozlabs.org
  S:	Maintained
  F:	arch/powerpc/platforms/8xx/
LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX
 +M:	Scott Wood scottwood@freescale.com
  M:	Kumar Gala galak@kernel.crashing.org
  W:	http://www.penguinppc.org/
  L:	linuxppc-dev@lists.ozlabs.org
 +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux.git
  S:	Maintained
  F:	arch/powerpc/platforms/83xx/
  F:	arch/powerpc/platforms/85xx/
@@@ -5554,11 -5495,10 +5537,11 @@@ S:	Maintaine
  F:	arch/arm/mach-lpc32xx/
LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 -M:	Nagalakshmi Nandigama Nagalakshmi.Nandigama@lsi.com
 -M:	Sreekanth Reddy Sreekanth.Reddy@lsi.com
 -M:	support@lsi.com
 -L:	DL-MPTFusionLinux@lsi.com
 +M:	Nagalakshmi Nandigama nagalakshmi.nandigama@avagotech.com
 +M:	Praveen Krishnamoorthy praveen.krishnamoorthy@avagotech.com
 +M:	Sreekanth Reddy sreekanth.reddy@avagotech.com
 +M:	Abhijit Mahajan abhijit.mahajan@avagotech.com
 +L:	MPT-FusionLinux.pdl@avagotech.com
  L:	linux-scsi@vger.kernel.org
  W:	http://www.lsilogic.com/support
  S:	Supported
@@@ -5667,6 -5607,16 +5650,6 @@@ F:	Documentation/networking/mac80211-in
  F:	include/net/mac80211.h
  F:	net/mac80211/
-MAC80211 PID RATE CONTROL
 -M:	Stefano Brivio stefano.brivio@polimi.it
 -M:	Mattias Nissler mattias.nissler@gmx.de
 -L:	linux-wireless@vger.kernel.org
 -W:	http://wireless.kernel.org/en/developers/Documentation/mac80211/RateControl/...
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
 -S:	Maintained
 -F:	net/mac80211/rc80211_pid*
 -
  MACVLAN DRIVER
  M:	Patrick McHardy kaber@trash.net
  L:	netdev@vger.kernel.org
@@@ -5730,7 -5680,7 +5713,7 @@@ F:	drivers/mmc/host/mvsdio.
  MATROX FRAMEBUFFER DRIVER
  L:	linux-fbdev@vger.kernel.org
  S:	Orphan
 -F:	drivers/video/matrox/matroxfb_*
 +F:	drivers/video/fbdev/matrox/matroxfb_*
  F:	include/uapi/linux/matroxfb.h
MAX16065 HARDWARE MONITOR DRIVER
@@@ -6372,8 -6322,8 +6355,8 @@@ NVIDIA (rivafb and nvidiafb) FRAMEBUFFE
  M:	Antonino Daplas adaplas@gmail.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/riva/
 -F:	drivers/video/nvidia/
 +F:	drivers/video/fbdev/riva/
 +F:	drivers/video/fbdev/nvidia/
NVM EXPRESS DRIVER
  M:	Matthew Wilcox willy@linux.intel.com
@@@ -6443,14 -6393,14 +6426,14 @@@ M:	Tomi Valkeinen <tomi.valkeinen@ti.co
  L:	linux-fbdev@vger.kernel.org
  L:	linux-omap@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/omap/
 +F:	drivers/video/fbdev/omap/
OMAP DISPLAY SUBSYSTEM and FRAMEBUFFER SUPPORT (DSS2)
  M:	Tomi Valkeinen tomi.valkeinen@ti.com
  L:	linux-omap@vger.kernel.org
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/omap2/
 +F:	drivers/video/fbdev/omap2/
  F:	Documentation/arm/OMAP/DSS
OMAP HARDWARE SPINLOCK SUPPORT
@@@ -6741,7 -6691,7 +6724,7 @@@ F:	drivers/char/agp/parisc-agp.
  F:	drivers/input/serio/gscps2.c
  F:	drivers/parport/parport_gsc.*
  F:	drivers/tty/serial/8250/8250_gsc.c
 -F:	drivers/video/sti*
 +F:	drivers/video/fbdev/sti*
  F:	drivers/video/console/sti*
  F:	drivers/video/logo/logo_parisc*
@@@ -6801,7 -6751,7 +6784,7 @@@ F:	arch/x86/kernel/quirks.
PCI DRIVER FOR IMX6
  M:	Richard Zhu r65037@freescale.com
 -M:	Shawn Guo shawn.guo@linaro.org
 +M:	Shawn Guo shawn.guo@freescale.com
  L:	linux-pci@vger.kernel.org
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
@@@ -6990,7 -6940,7 +6973,7 @@@ S:	Maintaine
  T:	git git://github.com/gxt/linux.git
  F:	drivers/input/serio/i8042-unicore32io.h
  F:	drivers/i2c/busses/i2c-puv3.c
 -F:	drivers/video/fb-puv3.c
 +F:	drivers/video/fbdev/fb-puv3.c
  F:	drivers/rtc/rtc-puv3.c
PMBUS HARDWARE MONITORING DRIVERS
@@@ -7022,10 -6972,10 +7005,10 @@@ POSIX CLOCKS and TIMER
  M:	Thomas Gleixner tglx@linutronix.de
  L:	linux-kernel@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
 -S:	Supported
 +S:	Maintained
  F:	fs/timerfd.c
  F:	include/linux/timer*
 -F:	kernel/*timer*
 +F:	kernel/time/*timer*
POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
  M:	Dmitry Eremin-Solenikov dbaryshkov@gmail.com
@@@ -7239,12 -7189,6 +7222,12 @@@ M:	Robert Jarzmik <robert.jarzmik@free.
  L:	rtc-linux@googlegroups.com
  S:	Maintained
+QAT DRIVER
 +M:      Tadeusz Struk tadeusz.struk@intel.com
 +L:      qat-linux@intel.com
 +S:      Supported
 +F:      drivers/crypto/qat/
 +
  QIB DRIVER
  M:	Mike Marciniszyn infinipath@intel.com
  L:	linux-rdma@vger.kernel.org
@@@ -7368,7 -7312,7 +7351,7 @@@ RADEON FRAMEBUFFER DISPLAY DRIVE
  M:	Benjamin Herrenschmidt benh@kernel.crashing.org
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/aty/radeon*
 +F:	drivers/video/fbdev/aty/radeon*
  F:	include/uapi/linux/radeonfb.h
RADIOSHARK RADIO DRIVER
@@@ -7390,7 -7334,7 +7373,7 @@@ RAGE128 FRAMEBUFFER DISPLAY DRIVE
  M:	Paul Mackerras paulus@samba.org
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/aty/aty128fb.c
 +F:	drivers/video/fbdev/aty/aty128fb.c
RALINK RT2X00 WIRELESS LAN DRIVER
  P:	rt2x00 project
@@@ -7432,14 -7376,10 +7415,14 @@@ L:	linux-kernel@vger.kernel.or
  S:	Supported
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
  F:	Documentation/RCU/torture.txt
 -F:	kernel/rcu/torture.c
 +F:	kernel/rcu/rcutorture.c
RCUTORTURE TEST FRAMEWORK
  M:	"Paul E. McKenney" paulmck@linux.vnet.ibm.com
 +M:	Josh Triplett josh@joshtriplett.org
 +R:	Steven Rostedt rostedt@goodmis.org
 +R:	Mathieu Desnoyers mathieu.desnoyers@efficios.com
 +R:	Lai Jiangshan laijs@cn.fujitsu.com
  L:	linux-kernel@vger.kernel.org
  S:	Supported
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
@@@ -7462,11 -7402,8 +7445,11 @@@ S:	Supporte
  F:	net/rds/
READ-COPY UPDATE (RCU)
 -M:	Dipankar Sarma dipankar@in.ibm.com
  M:	"Paul E. McKenney" paulmck@linux.vnet.ibm.com
 +M:	Josh Triplett josh@joshtriplett.org
 +R:	Steven Rostedt rostedt@goodmis.org
 +R:	Mathieu Desnoyers mathieu.desnoyers@efficios.com
 +R:	Lai Jiangshan laijs@cn.fujitsu.com
  L:	linux-kernel@vger.kernel.org
  W:	http://www.rdrop.com/users/paulmck/RCU/
  S:	Supported
@@@ -7476,7 -7413,7 +7459,7 @@@ X:	Documentation/RCU/torture.tx
  F:	include/linux/rcu*
  X:	include/linux/srcu.h
  F:	kernel/rcu/
 -X:	kernel/rcu/torture.c
 +X:	kernel/torture.c
REAL TIME CLOCK (RTC) SUBSYSTEM
  M:	Alessandro Zummo a.zummo@towertech.it
@@@ -7631,7 -7568,7 +7614,7 @@@ S3 SAVAGE FRAMEBUFFER DRIVE
  M:	Antonino Daplas adaplas@gmail.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/savage/
 +F:	drivers/video/fbdev/savage/
S390
  M:	Martin Schwidefsky schwidefsky@de.ibm.com
@@@ -7754,7 -7691,7 +7737,7 @@@ SAMSUNG FRAMEBUFFER DRIVE
  M:	Jingoo Han jg1.han@samsung.com
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/s3c-fb.c
 +F:	drivers/video/fbdev/s3c-fb.c
SAMSUNG MULTIFUNCTION DEVICE DRIVERS
  M:	Sangbeom Kim sbkim73@samsung.com
@@@ -8226,7 -8163,7 +8209,7 @@@ M:	Thomas Winischhofer <thomas@winischh
  W:	http://www.winischhofer.net/linuxsisvga.shtml
  S:	Maintained
  F:	Documentation/fb/sisfb.txt
 -F:	drivers/video/sis/
 +F:	drivers/video/fbdev/sis/
  F:	include/video/sisfb.h
SIS USB2VGA DRIVER
@@@ -8249,9 -8186,6 +8232,9 @@@ F:	mm/sl?b
  SLEEPABLE READ-COPY UPDATE (SRCU)
  M:	Lai Jiangshan laijs@cn.fujitsu.com
  M:	"Paul E. McKenney" paulmck@linux.vnet.ibm.com
 +M:	Josh Triplett josh@joshtriplett.org
 +R:	Steven Rostedt rostedt@goodmis.org
 +R:	Mathieu Desnoyers mathieu.desnoyers@efficios.com
  L:	linux-kernel@vger.kernel.org
  W:	http://www.rdrop.com/users/paulmck/RCU/
  S:	Supported
@@@ -8335,7 -8269,7 +8318,7 @@@ SMSC UFX6000 and UFX7000 USB to VGA DRI
  M:	Steve Glendinning steve.glendinning@shawell.net
  L:	linux-fbdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/video/smscufx.c
 +F:	drivers/video/fbdev/smscufx.c
SOC-CAMERA V4L2 SUBSYSTEM
  M:	Guennadi Liakhovetski g.liakhovetski@gmx.de
@@@ -8541,6 -8475,12 +8524,6 @@@ L:	devel@driverdev.osuosl.or
  S:	Supported
  F:	drivers/staging/
-STAGING - AGERE HERMES II and II.5 WIRELESS DRIVERS
 -M:	Henk de Groot pe1dnn@amsat.org
 -S:	Odd Fixes
 -F:	drivers/staging/wlags49_h2/
 -F:	drivers/staging/wlags49_h25/
 -
  STAGING - ASUS OLED
  M:	Jakub Schmidtke sjakub@gmail.com
  S:	Odd Fixes
@@@ -8552,6 -8492,14 +8535,6 @@@ M:	H Hartley Sweeten <hsweeten@visionen
  S:	Odd Fixes
  F:	drivers/staging/comedi/
-STAGING - CRYSTAL HD VIDEO DECODER
 -M:	Naren Sankar nsankar@broadcom.com
 -M:	Jarod Wilson jarod@wilsonet.com
 -M:	Scott Davilla davilla@4pi.com
 -M:	Manu Abraham abraham.manu@gmail.com
 -S:	Odd Fixes
 -F:	drivers/staging/crystalhd/
 -
  STAGING - ECHO CANCELLER
  M:	Steve Underwood steveu@coppice.org
  M:	David Rowe david@rowetel.com
@@@ -8670,6 -8618,11 +8653,6 @@@ M:	Forest Bond <forest@alittletooquiet.
  S:	Odd Fixes
  F:	drivers/staging/vt665?/
-STAGING - WINBOND IS89C35 WLAN USB DRIVER
 -M:	Pavel Machek pavel@ucw.cz
 -S:	Odd Fixes
 -F:	drivers/staging/winbond/
 -
  STAGING - XGI Z7,Z9,Z11 PCI DISPLAY DRIVER
  M:	Arnaud Patard arnaud.patard@rtp-net.org
  S:	Odd Fixes
@@@ -8995,7 -8948,7 +8978,7 @@@ F:	drivers/media/radio/radio-raremono.
THERMAL
  M:	Zhang Rui rui.zhang@intel.com
 -M:	Eduardo Valentin eduardo.valentin@ti.com
 +M:	Eduardo Valentin edubezval@gmail.com
  L:	linux-pm@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git
@@@ -9022,7 -8975,7 +9005,7 @@@ S:	Maintaine
  F:	drivers/platform/x86/thinkpad_acpi.c
TI BANDGAP AND THERMAL DRIVER
 -M:	Eduardo Valentin eduardo.valentin@ti.com
 +M:	Eduardo Valentin edubezval@gmail.com
  L:	linux-pm@vger.kernel.org
  S:	Supported
  F:	drivers/thermal/ti-soc-thermal/
@@@ -9436,6 -9389,12 +9419,6 @@@ S:	Maintaine
  F:	drivers/usb/host/isp116x*
  F:	include/linux/usb/isp116x.h
-USB KAWASAKI LSI DRIVER
 -M:	Oliver Neukum oliver@neukum.org
 -L:	linux-usb@vger.kernel.org
 -S:	Maintained
 -F:	drivers/usb/serial/kl5kusb105.*
 -
  USB MASS STORAGE DRIVER
  M:	Matthew Dharm mdharm-usb@one-eyed-alien.net
  L:	linux-usb@vger.kernel.org
@@@ -9463,6 -9422,12 +9446,6 @@@ S:	Maintaine
  F:	Documentation/usb/ohci.txt
  F:	drivers/usb/host/ohci*
-USB OPTION-CARD DRIVER
 -M:	Matthias Urlichs smurf@smurf.noris.de
 -L:	linux-usb@vger.kernel.org
 -S:	Maintained
 -F:	drivers/usb/serial/option.c
 -
  USB PEGASUS DRIVER
  M:	Petko Manolov petkan@nucleusys.com
  L:	linux-usb@vger.kernel.org
@@@ -9495,7 -9460,7 +9478,7 @@@ S:	Maintaine
  F:	drivers/net/usb/rtl8150.c
USB SERIAL SUBSYSTEM
 -M:	Johan Hovold jhovold@gmail.com
 +M:	Johan Hovold johan@kernel.org
  L:	linux-usb@vger.kernel.org
  S:	Maintained
  F:	Documentation/usb/usb-serial.txt
@@@ -9636,7 -9601,7 +9619,7 @@@ L:	linux-fbdev@vger.kernel.or
  W:	http://dev.gentoo.org/~spock/projects/uvesafb/
  S:	Maintained
  F:	Documentation/fb/uvesafb.txt
 -F:	drivers/video/uvesafb.*
 +F:	drivers/video/fbdev/uvesafb.*
VFAT/FAT/MSDOS FILESYSTEM
  M:	OGAWA Hirofumi hirofumi@mail.parknet.co.jp
@@@ -9709,7 -9674,7 +9692,7 @@@ S:	Maintaine
  F:	include/linux/via-core.h
  F:	include/linux/via-gpio.h
  F:	include/linux/via_i2c.h
 -F:	drivers/video/via/
 +F:	drivers/video/fbdev/via/
VIA VELOCITY NETWORK DRIVER
  M:	Francois Romieu romieu@fr.zoreil.com
diff --combined Makefile
index 39022dc,512c82f..86eb013
--- a/Makefile
+++ b/Makefile
@@@ -1,7 -1,7 +1,7 @@@
  VERSION = 3
  PATCHLEVEL = 16
  SUBLEVEL = 0
 -EXTRAVERSION = -rc3
 +EXTRAVERSION = -rc4
  NAME = Shuffling Zombie Juror
# *DOCUMENTATION*
@@@ -41,29 -41,6 +41,29 @@@ unexport GREP_OPTION
  # descending is started. They are now explicitly listed as the
  # prepare rule.
+# Beautify output
 +# ---------------------------------------------------------------------------
 +#
 +# Normally, we echo the whole command before executing it. By making
 +# that echo $($(quiet)$(cmd)), we now have the possibility to set
 +# $(quiet) to choose other forms of output instead, e.g.
 +#
 +#         quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@
 +#         cmd_cc_o_c       = $(CC) $(c_flags) -c -o $@ $<
 +#
 +# If $(quiet) is empty, the whole command will be printed.
 +# If it is set to "quiet_", only the short version will be printed.
 +# If it is set to "silent_", nothing will be printed at all, since
 +# the variable $(silent_cmd_cc_o_c) doesn't exist.
 +#
 +# A simple variant is to prefix commands with $(Q) - that's useful
 +# for commands that shall be hidden in non-verbose mode.
 +#
 +#	$(Q)ln $@ :<
 +#
 +# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
 +# If KBUILD_VERBOSE equals 1 then the above command is displayed.
 +#
  # To put more focus on warnings, be less verbose as default
  # Use 'make V=1' to see the full commands
@@@ -74,29 -51,6 +74,29 @@@ ifndef KBUILD_VERBOS
    KBUILD_VERBOSE = 0
  endif
+ifeq ($(KBUILD_VERBOSE),1)
 +  quiet =
 +  Q =
 +else
 +  quiet=quiet_
 +  Q = @
 +endif
 +
 +# If the user is running make -s (silent mode), suppress echoing of
 +# commands
 +
 +ifneq ($(filter 4.%,$(MAKE_VERSION)),)	# make-4
 +ifneq ($(filter %s ,$(firstword x$(MAKEFLAGS))),)
 +  quiet=silent_
 +endif
 +else					# make-3.8x
 +ifneq ($(filter s% -s%,$(MAKEFLAGS)),)
 +  quiet=silent_
 +endif
 +endif
 +
 +export quiet Q KBUILD_VERBOSE
 +
  # Call a source code checker (by default, "sparse") as part of the
  # C compilation.
  #
@@@ -172,13 -126,7 +172,13 @@@ PHONY += $(MAKECMDGOALS) sub-mak
  $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make
    @:
+# Fake the "Entering directory" message once, so that IDEs/editors are
 +# able to understand relative filenames.
 +       echodir := @echo
 + quiet_echodir := @echo
 +silent_echodir := @:
  sub-make: FORCE
 +	$($(quiet)echodir) "make[1]: Entering directory `$(KBUILD_OUTPUT)'"
    $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
    KBUILD_SRC=$(CURDIR) \
    KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile \
@@@ -341,6 -289,52 +341,6 @@@ endi
  export KBUILD_MODULES KBUILD_BUILTIN
  export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD
-# Beautify output
 -# ---------------------------------------------------------------------------
 -#
 -# Normally, we echo the whole command before executing it. By making
 -# that echo $($(quiet)$(cmd)), we now have the possibility to set
 -# $(quiet) to choose other forms of output instead, e.g.
 -#
 -#         quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@
 -#         cmd_cc_o_c       = $(CC) $(c_flags) -c -o $@ $<
 -#
 -# If $(quiet) is empty, the whole command will be printed.
 -# If it is set to "quiet_", only the short version will be printed.
 -# If it is set to "silent_", nothing will be printed at all, since
 -# the variable $(silent_cmd_cc_o_c) doesn't exist.
 -#
 -# A simple variant is to prefix commands with $(Q) - that's useful
 -# for commands that shall be hidden in non-verbose mode.
 -#
 -#	$(Q)ln $@ :<
 -#
 -# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
 -# If KBUILD_VERBOSE equals 1 then the above command is displayed.
 -
 -ifeq ($(KBUILD_VERBOSE),1)
 -  quiet =
 -  Q =
 -else
 -  quiet=quiet_
 -  Q = @
 -endif
 -
 -# If the user is running make -s (silent mode), suppress echoing of
 -# commands
 -
 -ifneq ($(filter 4.%,$(MAKE_VERSION)),)	# make-4
 -ifneq ($(filter %s ,$(firstword x$(MAKEFLAGS))),)
 -  quiet=silent_
 -endif
 -else					# make-3.8x
 -ifneq ($(filter s% -s%,$(MAKEFLAGS)),)
 -  quiet=silent_
 -endif
 -endif
 -
 -export quiet Q KBUILD_VERBOSE
 -
  ifneq ($(CC),)
  ifeq ($(shell $(CC) -v 2>&1 | grep -c "clang version"), 1)
  COMPILER := clang
@@@ -360,14 -354,9 +360,14 @@@ include $(srctree)/scripts/Kbuild.inclu
  # Make variables (CC, etc...)
  AS		= $(CROSS_COMPILE)as
  LD		= $(CROSS_COMPILE)ld
 +LDFINAL	= $(LD)
  CC		= $(CROSS_COMPILE)gcc
  CPP		= $(CC) -E
 +ifdef CONFIG_LTO
 +AR		= $(CROSS_COMPILE)gcc-ar
 +else
  AR		= $(CROSS_COMPILE)ar
 +endif
  NM		= $(CROSS_COMPILE)nm
  STRIP		= $(CROSS_COMPILE)strip
  OBJCOPY		= $(CROSS_COMPILE)objcopy
@@@ -426,7 -415,7 +426,7 @@@ KERNELVERSION = $(VERSION)$(if $(PATCHL
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
  export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
 -export CPP AR NM STRIP OBJCOPY OBJDUMP
 +export CPP AR NM STRIP OBJCOPY OBJDUMP LDFINAL
  export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
  export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
@@@ -437,17 -426,6 +437,17 @@@ export KBUILD_AFLAGS_MODULE KBUILD_CFLA
  export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
  export KBUILD_ARFLAGS
+ifdef CONFIG_LTO
 +# LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs
 +# it's easy to drive the machine OOM. Use the object directory
 +# instead.
 +ifndef TMPDIR
 +TMPDIR ?= $(objtree)
 +export TMPDIR
 +$(info setting TMPDIR=$(objtree) for LTO build)
 +endif
 +endif
 +
  # When compiling out-of-tree modules, put MODVERDIR in the module
  # tree rather than in the kernel tree. The kernel tree might
  # even be read-only.
@@@ -637,6 -615,9 +637,9 @@@ els
  KBUILD_CFLAGS	+= -O2
  endif
+ # Tell gcc to never replace conditional load with a non-conditional one
+ KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
+ 
  ifdef CONFIG_READABLE_ASM
  # Disable optimizations that make assembler listings hard to read.
  # reorder blocks reorders the control in the function
@@@ -767,7 -748,6 +770,7 @@@ ifeq ($(shell $(CONFIG_SHELL) $(srctree
  endif
include $(srctree)/scripts/Makefile.extrawarn
 +include ${srctree}/scripts/Makefile.lto
# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
  KBUILD_CPPFLAGS += $(KCPPFLAGS)
@@@ -1193,7 -1173,7 +1196,7 @@@ distclean: mrprope
  # Packaging of the kernel to various formats
  # ---------------------------------------------------------------------------
  # rpm target kept for backward compatibility
 -package-dir	:= $(srctree)/scripts/package
 +package-dir	:= scripts/package
%src-pkg: FORCE
    $(Q)$(MAKE) $(build)=$(package-dir) $@
diff --combined arch/arm/Kconfig
index ea73acc,b9679c8..4fc4744
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@@ -83,6 -83,7 +83,7 @@@ config AR
      http://www.arm.linux.org.uk/.
config ARM_HAS_SG_CHAIN
+ 	select ARCH_HAS_SG_CHAIN
    bool
config NEED_SG_DMA_LENGTH
@@@ -320,6 -321,7 +321,6 @@@ config ARCH_INTEGRATO
    select HAVE_TCM
    select ICST
    select MULTI_IRQ_HANDLER
 -	select NEED_MACH_MEMORY_H
    select PLAT_VERSATILE
    select SPARSE_IRQ
    select USE_OF
@@@ -757,6 -759,42 +758,6 @@@ config ARCH_S3C64X
    help
      Samsung S3C64XX series based systems
-config ARCH_S5P64X0
 -	bool "Samsung S5P6440 S5P6450"
 -	select ATAGS
 -	select CLKDEV_LOOKUP
 -	select CLKSRC_SAMSUNG_PWM
 -	select CPU_V6
 -	select GENERIC_CLOCKEVENTS
 -	select GPIO_SAMSUNG
 -	select HAVE_S3C2410_I2C if I2C
 -	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 -	select HAVE_S3C_RTC if RTC_CLASS
 -	select NEED_MACH_GPIO_H
 -	select SAMSUNG_ATAGS
 -	select SAMSUNG_WDT_RESET
 -	help
 -	  Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440,
 -	  SMDK6450.
 -
 -config ARCH_S5PC100
 -	bool "Samsung S5PC100"
 -	select ARCH_REQUIRE_GPIOLIB
 -	select ATAGS
 -	select CLKDEV_LOOKUP
 -	select CLKSRC_SAMSUNG_PWM
 -	select CPU_V7
 -	select GENERIC_CLOCKEVENTS
 -	select GPIO_SAMSUNG
 -	select HAVE_S3C2410_I2C if I2C
 -	select HAVE_S3C2410_WATCHDOG if WATCHDOG
 -	select HAVE_S3C_RTC if RTC_CLASS
 -	select NEED_MACH_GPIO_H
 -	select SAMSUNG_ATAGS
 -	select SAMSUNG_WDT_RESET
 -	help
 -	  Samsung S5PC100 series based systems
 -
  config ARCH_S5PV210
    bool "Samsung S5PV210/S5PC110"
    select ARCH_HAS_HOLES_MEMORYMODEL
@@@ -967,6 -1005,10 +968,6 @@@ source "arch/arm/mach-s3c24xx/Kconfig
source "arch/arm/mach-s3c64xx/Kconfig"
-source "arch/arm/mach-s5p64x0/Kconfig"
 -
 -source "arch/arm/mach-s5pc100/Kconfig"
 -
  source "arch/arm/mach-s5pv210/Kconfig"
source "arch/arm/mach-exynos/Kconfig"
@@@ -1528,7 -1570,7 +1529,7 @@@ source kernel/Kconfig.preemp
config HZ_FIXED
    int
 -	default 200 if ARCH_EBSA110 || ARCH_S3C24XX || ARCH_S5P64X0 || \
 +	default 200 if ARCH_EBSA110 || ARCH_S3C24XX || \
    	ARCH_S5PV210 || ARCH_EXYNOS4
    default AT91_TIMER_HZ if ARCH_AT91
    default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE_LEGACY
@@@ -2153,6 -2195,7 +2154,6 @@@ menu "Power management options
  source "kernel/power/Kconfig"
config ARCH_SUSPEND_POSSIBLE
 -	depends on !ARCH_S5PC100
    depends on CPU_ARM920T || CPU_ARM926T || CPU_FEROCEON || CPU_SA1100 || \
    	CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
    def_bool y
diff --combined arch/arm64/Kconfig
index 7fc6e2e,65317fb..301aefd
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@@ -2,6 -2,7 +2,7 @@@ config ARM6
    def_bool y
    select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
    select ARCH_HAS_OPP
+ 	select ARCH_HAS_SG_CHAIN
    select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
    select ARCH_USE_CMPXCHG_LOCKREF
    select ARCH_WANT_OPTIONAL_GPIOLIB
@@@ -10,7 -11,6 +11,7 @@@
    select ARM_AMBA
    select ARM_ARCH_TIMER
    select ARM_GIC
 +	select ARM_GIC_V3
    select BUILDTIME_EXTABLE_SORT
    select CLONE_BACKWARDS
    select COMMON_CLK
diff --combined arch/powerpc/Kconfig
index fefe7c8,e5b9170..d5ecc91
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@@ -111,6 -111,7 +111,7 @@@ config PP
    select HAVE_DMA_API_DEBUG
    select HAVE_OPROFILE
    select HAVE_DEBUG_KMEMLEAK
+ 	select ARCH_HAS_SG_CHAIN
    select GENERIC_ATOMIC64 if PPC32
    select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
    select HAVE_PERF_EVENTS
@@@ -414,7 -415,7 +415,7 @@@ config KEXE
  config CRASH_DUMP
    bool "Build a kdump crash kernel"
    depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
 -	select RELOCATABLE if PPC64 || 44x || FSL_BOOKE
 +	select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
    help
      Build a kernel suitable for use as a kdump capture kernel.
      The same kernel binary can be used as production kernel and dump
@@@ -1017,7 -1018,6 +1018,7 @@@ endmen
  if PPC64
  config RELOCATABLE
    bool "Build a relocatable kernel"
 +	depends on !COMPILE_TEST
    select NONSTATIC_KERNEL
    help
      This builds a kernel image that is capable of running anywhere
diff --combined arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d154d9,a41e625..93218ae
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@@ -37,8 -37,6 +37,6 @@@
  #include <asm/ppc-opcode.h>
  #include <asm/cputable.h>
- #include "book3s_hv_cma.h"
- 
  /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
  #define MAX_LPID_970	63
@@@ -64,10 -62,10 +62,10 @@@ long kvmppc_alloc_hpt(struct kvm *kvm, 
    }
kvm->arch.hpt_cma_alloc = 0;
    page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
    if (page) {
    	hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ 		memset((void *)hpt, 0, (1 << order));
    	kvm->arch.hpt_cma_alloc = 1;
    }
@@@ -450,7 -448,7 +448,7 @@@ static int kvmppc_mmu_book3s_64_hv_xlat
    unsigned long slb_v;
    unsigned long pp, key;
    unsigned long v, gr;
 -	unsigned long *hptep;
 +	__be64 *hptep;
    int index;
    int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
@@@ -473,13 -471,13 +471,13 @@@
    	preempt_enable();
    	return -ENOENT;
    }
 -	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
 -	v = hptep[0] & ~HPTE_V_HVLOCK;
 +	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
 +	v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
    gr = kvm->arch.revmap[index].guest_rpte;
/* Unlock the HPTE */
    asm volatile("lwsync" : : : "memory");
 -	hptep[0] = v;
 +	hptep[0] = cpu_to_be64(v);
    preempt_enable();
gpte->eaddr = eaddr;
@@@ -583,8 -581,7 +581,8 @@@ int kvmppc_book3s_hv_page_fault(struct 
    			unsigned long ea, unsigned long dsisr)
  {
    struct kvm *kvm = vcpu->kvm;
 -	unsigned long *hptep, hpte[3], r;
 +	unsigned long hpte[3], r;
 +	__be64 *hptep;
    unsigned long mmu_seq, psize, pte_size;
    unsigned long gpa_base, gfn_base;
    unsigned long gpa, gfn, hva, pfn;
@@@ -607,16 -604,16 +605,16 @@@
    if (ea != vcpu->arch.pgfault_addr)
    	return RESUME_GUEST;
    index = vcpu->arch.pgfault_index;
 -	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
 +	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
    rev = &kvm->arch.revmap[index];
    preempt_disable();
    while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
    	cpu_relax();
 -	hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
 -	hpte[1] = hptep[1];
 +	hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
 +	hpte[1] = be64_to_cpu(hptep[1]);
    hpte[2] = r = rev->guest_rpte;
    asm volatile("lwsync" : : : "memory");
 -	hptep[0] = hpte[0];
 +	hptep[0] = cpu_to_be64(hpte[0]);
    preempt_enable();
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@@ -732,9 -729,8 +730,9 @@@
    preempt_disable();
    while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
    	cpu_relax();
 -	if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
 -	    rev->guest_rpte != hpte[2])
 +	if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
 +		be64_to_cpu(hptep[1]) != hpte[1] ||
 +		rev->guest_rpte != hpte[2])
    	/* HPTE has been changed under us; let the guest retry */
    	goto out_unlock;
    hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@@ -754,20 -750,20 +752,20 @@@
    rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
    r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
-	if (hptep[0] & HPTE_V_VALID) {
 +	if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
    	/* HPTE was previously valid, so we need to invalidate it */
    	unlock_rmap(rmap);
 -		hptep[0] |= HPTE_V_ABSENT;
 +		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
    	kvmppc_invalidate_hpte(kvm, hptep, index);
    	/* don't lose previous R and C bits */
 -		r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
 +		r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
    } else {
    	kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
    }
-	hptep[1] = r;
 +	hptep[1] = cpu_to_be64(r);
    eieio();
 -	hptep[0] = hpte[0];
 +	hptep[0] = cpu_to_be64(hpte[0]);
    asm volatile("ptesync" : : : "memory");
    preempt_enable();
    if (page && hpte_is_writable(r))
@@@ -786,7 -782,7 +784,7 @@@
    return ret;
out_unlock:
 -	hptep[0] &= ~HPTE_V_HVLOCK;
 +	hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
    preempt_enable();
    goto out_put;
  }
@@@ -862,7 -858,7 +860,7 @@@ static int kvm_unmap_rmapp(struct kvm *
  {
    struct revmap_entry *rev = kvm->arch.revmap;
    unsigned long h, i, j;
 -	unsigned long *hptep;
 +	__be64 *hptep;
    unsigned long ptel, psize, rcbits;
for (;;) {
@@@ -878,11 -874,11 +876,11 @@@
    	 * rmap chain lock.
    	 */
    	i = *rmapp & KVMPPC_RMAP_INDEX;
 -		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
 +		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
    	if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
    		/* unlock rmap before spinning on the HPTE lock */
    		unlock_rmap(rmapp);
 -			while (hptep[0] & HPTE_V_HVLOCK)
 +			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
    			cpu_relax();
    		continue;
    	}
@@@ -901,14 -897,14 +899,14 @@@
/* Now check and modify the HPTE */
    	ptel = rev[i].guest_rpte;
 -		psize = hpte_page_size(hptep[0], ptel);
 -		if ((hptep[0] & HPTE_V_VALID) &&
 +		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
 +		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
    	    hpte_rpn(ptel, psize) == gfn) {
    		if (kvm->arch.using_mmu_notifiers)
 -				hptep[0] |= HPTE_V_ABSENT;
 +				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
    		kvmppc_invalidate_hpte(kvm, hptep, i);
    		/* Harvest R and C */
 -			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
 +			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
    		*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
    		if (rcbits & ~rev[i].guest_rpte) {
    			rev[i].guest_rpte = ptel | rcbits;
@@@ -916,7 -912,7 +914,7 @@@
    		}
    	}
    	unlock_rmap(rmapp);
 -		hptep[0] &= ~HPTE_V_HVLOCK;
 +		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
    }
    return 0;
  }
@@@ -963,7 -959,7 +961,7 @@@ static int kvm_age_rmapp(struct kvm *kv
  {
    struct revmap_entry *rev = kvm->arch.revmap;
    unsigned long head, i, j;
 -	unsigned long *hptep;
 +	__be64 *hptep;
    int ret = 0;
retry:
@@@ -979,24 -975,23 +977,24 @@@
i = head = *rmapp & KVMPPC_RMAP_INDEX;
    do {
 -		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
 +		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
    	j = rev[i].forw;
/* If this HPTE isn't referenced, ignore it */
 -		if (!(hptep[1] & HPTE_R_R))
 +		if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
    		continue;
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
    		/* unlock rmap before spinning on the HPTE lock */
    		unlock_rmap(rmapp);
 -			while (hptep[0] & HPTE_V_HVLOCK)
 +			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
    			cpu_relax();
    		goto retry;
    	}
/* Now check and modify the HPTE */
 -		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
 +		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 +		    (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
    		kvmppc_clear_ref_hpte(kvm, hptep, i);
    		if (!(rev[i].guest_rpte & HPTE_R_R)) {
    			rev[i].guest_rpte |= HPTE_R_R;
@@@ -1004,7 -999,7 +1002,7 @@@
    		}
    		ret = 1;
    	}
 -		hptep[0] &= ~HPTE_V_HVLOCK;
 +		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
    } while ((i = j) != head);
unlock_rmap(rmapp);
@@@ -1038,7 -1033,7 +1036,7 @@@ static int kvm_test_age_rmapp(struct kv
    	do {
    		hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
    		j = rev[i].forw;
 -			if (hp[1] & HPTE_R_R)
 +			if (be64_to_cpu(hp[1]) & HPTE_R_R)
    			goto out;
    	} while ((i = j) != head);
    }
@@@ -1078,7 -1073,7 +1076,7 @@@ static int kvm_test_clear_dirty_npages(
    unsigned long head, i, j;
    unsigned long n;
    unsigned long v, r;
 -	unsigned long *hptep;
 +	__be64 *hptep;
    int npages_dirty = 0;
retry:
@@@ -1094,8 -1089,7 +1092,8 @@@
i = head = *rmapp & KVMPPC_RMAP_INDEX;
    do {
 -		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
 +		unsigned long hptep1;
 +		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
    	j = rev[i].forw;
/*
@@@ -1112,30 -1106,29 +1110,30 @@@
    	 * Otherwise we need to do the tlbie even if C==0 in
    	 * order to pick up any delayed writeback of C.
    	 */
 -		if (!(hptep[1] & HPTE_R_C) &&
 -		    (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
 +		hptep1 = be64_to_cpu(hptep[1]);
 +		if (!(hptep1 & HPTE_R_C) &&
 +		    (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
    		continue;
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
    		/* unlock rmap before spinning on the HPTE lock */
    		unlock_rmap(rmapp);
 -			while (hptep[0] & HPTE_V_HVLOCK)
 +			while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
    			cpu_relax();
    		goto retry;
    	}
/* Now check and modify the HPTE */
 -		if (!(hptep[0] & HPTE_V_VALID))
 +		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
    		continue;
/* need to make it temporarily absent so C is stable */
 -		hptep[0] |= HPTE_V_ABSENT;
 +		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
    	kvmppc_invalidate_hpte(kvm, hptep, i);
 -		v = hptep[0];
 -		r = hptep[1];
 +		v = be64_to_cpu(hptep[0]);
 +		r = be64_to_cpu(hptep[1]);
    	if (r & HPTE_R_C) {
 -			hptep[1] = r & ~HPTE_R_C;
 +			hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
    		if (!(rev[i].guest_rpte & HPTE_R_C)) {
    			rev[i].guest_rpte |= HPTE_R_C;
    			note_hpte_modification(kvm, &rev[i]);
@@@ -1148,7 -1141,7 +1146,7 @@@
    	}
    	v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
    	v |= HPTE_V_VALID;
 -		hptep[0] = v;
 +		hptep[0] = cpu_to_be64(v);
    } while ((i = j) != head);
unlock_rmap(rmapp);
@@@ -1312,7 -1305,7 +1310,7 @@@ struct kvm_htab_ctx 
   * Returns 1 if this HPT entry has been modified or has pending
   * R/C bit changes.
   */
 -static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
 +static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
  {
    unsigned long rcbits_unset;
@@@ -1321,14 -1314,13 +1319,14 @@@
/* Also need to consider changes in reference and changed bits */
    rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
 -	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
 +	if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
 +	    (be64_to_cpu(hptp[1]) & rcbits_unset))
    	return 1;
return 0;
  }
-static long record_hpte(unsigned long flags, unsigned long *hptp,
 +static long record_hpte(unsigned long flags, __be64 *hptp,
    		unsigned long *hpte, struct revmap_entry *revp,
    		int want_valid, int first_pass)
  {
@@@ -1343,10 -1335,10 +1341,10 @@@
    	return 0;
valid = 0;
 -	if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 +	if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
    	valid = 1;
    	if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
 -		    !(hptp[0] & HPTE_V_BOLTED))
 +		    !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
    		valid = 0;
    }
    if (valid != want_valid)
@@@ -1358,7 -1350,7 +1356,7 @@@
    	preempt_disable();
    	while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
    		cpu_relax();
 -		v = hptp[0];
 +		v = be64_to_cpu(hptp[0]);
/* re-evaluate valid and dirty from synchronized HPTE value */
    	valid = !!(v & HPTE_V_VALID);
@@@ -1366,9 -1358,9 +1364,9 @@@
/* Harvest R and C into guest view if necessary */
    	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
 -		if (valid && (rcbits_unset & hptp[1])) {
 -			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
 -				HPTE_GR_MODIFIED;
 +		if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
 +			revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
 +				(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
    		dirty = 1;
    	}
@@@ -1387,13 -1379,13 +1385,13 @@@
    		revp->guest_rpte = r;
    	}
    	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
 -		hptp[0] &= ~HPTE_V_HVLOCK;
 +		hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
    	preempt_enable();
    	if (!(valid == want_valid && (first_pass || dirty)))
    		ok = 0;
    }
 -	hpte[0] = v;
 -	hpte[1] = r;
 +	hpte[0] = cpu_to_be64(v);
 +	hpte[1] = cpu_to_be64(r);
    return ok;
  }
@@@ -1403,7 -1395,7 +1401,7 @@@ static ssize_t kvm_htab_read(struct fil
    struct kvm_htab_ctx *ctx = file->private_data;
    struct kvm *kvm = ctx->kvm;
    struct kvm_get_htab_header hdr;
 -	unsigned long *hptp;
 +	__be64 *hptp;
    struct revmap_entry *revp;
    unsigned long i, nb, nw;
    unsigned long __user *lbuf;
@@@ -1419,7 -1411,7 +1417,7 @@@
    flags = ctx->flags;
i = ctx->index;
 -	hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 +	hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
    revp = kvm->arch.revmap + i;
    lbuf = (unsigned long __user *)buf;
@@@ -1503,7 -1495,7 +1501,7 @@@ static ssize_t kvm_htab_write(struct fi
    unsigned long i, j;
    unsigned long v, r;
    unsigned long __user *lbuf;
 -	unsigned long *hptp;
 +	__be64 *hptp;
    unsigned long tmp[2];
    ssize_t nb;
    long int err, ret;
@@@ -1545,7 -1537,7 +1543,7 @@@
    	    i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
    		break;
-		hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 +		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
    	lbuf = (unsigned long __user *)buf;
    	for (j = 0; j < hdr.n_valid; ++j) {
    		err = -EFAULT;
@@@ -1557,7 -1549,7 +1555,7 @@@
    		lbuf += 2;
    		nb += HPTE_SIZE;
-			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
 +			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
    			kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
    		err = -EIO;
    		ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
@@@ -1583,7 -1575,7 +1581,7 @@@
    	}
for (j = 0; j < hdr.n_invalid; ++j) {
 -			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
 +			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
    			kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
    		++i;
    		hptp += 2;
diff --combined arch/powerpc/kvm/book3s_hv_builtin.c
index 3b41447,6cf498a..329d7fd
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@@ -16,12 -16,14 +16,14 @@@
  #include <linux/init.h>
  #include <linux/memblock.h>
  #include <linux/sizes.h>
+ #include <linux/cma.h>
#include <asm/cputable.h>
  #include <asm/kvm_ppc.h>
  #include <asm/kvm_book3s.h>
- #include "book3s_hv_cma.h"
+ #define KVM_CMA_CHUNK_ORDER	18
+ 
  /*
   * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
   * should be power of 2.
@@@ -43,6 -45,8 +45,8 @@@ static unsigned long kvm_cma_resv_rati
  unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
  EXPORT_SYMBOL_GPL(kvm_rma_pages);
+ static struct cma *kvm_cma;
+ 
  /* Work out RMLS (real mode limit selector) field value for a given RMA size.
     Assumes POWER7 or PPC970. */
  static inline int lpcr_rmls(unsigned long rma_size)
@@@ -97,7 -101,7 +101,7 @@@ struct kvm_rma_info *kvm_alloc_rma(
    ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
    if (!ri)
    	return NULL;
- 	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+ 	page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
    if (!page)
    	goto err_out;
    atomic_set(&ri->use_count, 1);
@@@ -112,7 -116,7 +116,7 @@@ EXPORT_SYMBOL_GPL(kvm_alloc_rma)
  void kvm_release_rma(struct kvm_rma_info *ri)
  {
    if (atomic_dec_and_test(&ri->use_count)) {
- 		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+ 		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
    	kfree(ri);
    }
  }
@@@ -131,16 -135,18 +135,18 @@@ struct page *kvm_alloc_hpt(unsigned lon
  {
    unsigned long align_pages = HPT_ALIGN_PAGES;
+ 	VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ 
    /* Old CPUs require HPT aligned on a multiple of its size */
    if (!cpu_has_feature(CPU_FTR_ARCH_206))
    	align_pages = nr_pages;
- 	return kvm_alloc_cma(nr_pages, align_pages);
+ 	return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
  }
  EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
void kvm_release_hpt(struct page *page, unsigned long nr_pages)
  {
- 	kvm_release_cma(page, nr_pages);
+ 	cma_release(kvm_cma, page, nr_pages);
  }
  EXPORT_SYMBOL_GPL(kvm_release_hpt);
@@@ -179,7 -185,8 +185,8 @@@ void __init kvm_cma_reserve(void
    		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
- 		kvm_cma_declare_contiguous(selected_size, align_size);
+ 		cma_declare_contiguous(0, selected_size, 0, align_size,
+ 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
    }
  }
@@@ -212,16 -219,3 +219,16 @@@ bool kvm_hv_mode_active(void
  {
    return atomic_read(&hv_vm_count) != 0;
  }
 +
 +extern int hcall_real_table[], hcall_real_table_end[];
 +
 +int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
 +{
 +	cmd /= 4;
 +	if (cmd < hcall_real_table_end - hcall_real_table &&
 +	    hcall_real_table[cmd])
 +		return 1;
 +
 +	return 0;
 +}
 +EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
diff --combined arch/x86/Kconfig
index be0d37e,77e7790..8657c06
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@@ -96,6 -96,7 +96,7 @@@ config X8
    select IRQ_FORCED_THREADING
    select HAVE_BPF_JIT if X86_64
    select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ 	select ARCH_HAS_SG_CHAIN
    select CLKEVT_I8253
    select ARCH_HAVE_NMI_SAFE_CMPXCHG
    select GENERIC_IOMAP
@@@ -536,7 -537,7 +537,7 @@@ config X86_32_IRI
config SCHED_OMIT_FRAME_POINTER
    def_bool y
 -	prompt "Single-depth WCHAN output"
 +	prompt "Single-depth WCHAN output" if !LTO && !FRAME_POINTER
    depends on X86
    ---help---
      Calculate simpler /proc/<PID>/wchan values. If this option
@@@ -1576,6 -1577,9 +1577,9 @@@ source kernel/Kconfig.h
config KEXEC
    bool "kexec system call"
+ 	select BUILD_BIN2C
+ 	select CRYPTO
+ 	select CRYPTO_SHA256
    ---help---
      kexec is a system call that implements the ability to shutdown your
      current kernel, and to start another kernel.  It is like a reboot
diff --combined arch/x86/Makefile
index c65fd96,dc302a7..c1aa368
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@@ -15,9 -15,12 +15,9 @@@ endi
  # that way we can complain to the user if the CPU is insufficient.
  #
  # The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
 -# older versions of GCC, we need to play evil and unreliable tricks to
 -# attempt to ensure that our asm(".code16gcc") is first in the asm
 -# output.
 -CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \
 -		    $(call cc-option, -fno-toplevel-reorder,\
 -		      $(call cc-option, -fno-unit-at-a-time))
 +# older versions of GCC, include an *assembly* header to make sure that
 +# gcc doesn't play any games behind our back.
 +CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
  M16_CFLAGS	 := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
REALMODE_CFLAGS	:= $(M16_CFLAGS) -g -Os -D__KERNEL__ \
@@@ -183,6 -186,14 +183,14 @@@ archscripts: scripts_basi
  archheaders:
    $(Q)$(MAKE) $(build)=arch/x86/syscalls all
+ archprepare:
+ ifeq ($(CONFIG_KEXEC),y)
+ # Build only for 64bit. No loaders for 32bit yet.
+  ifeq ($(CONFIG_X86_64),y)
+ 	$(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
+  endif
+ endif
+ 
  ###
  # Kernel objects
diff --combined arch/x86/mm/fault.c
index 1dbade8,0193a32..939cb8c
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -350,7 -350,7 +350,7 @@@ out
void vmalloc_sync_all(void)
  {
- 	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
+ 	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
  }
/*
@@@ -577,8 -577,6 +577,8 @@@ static int is_f00f_bug(struct pt_regs *
static const char nx_warning[] = KERN_CRIT
  "kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
 +static const char smep_warning[] = KERN_CRIT
 +"unable to execute userspace code (SMEP?) (uid: %d)\n";
static void
  show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@@ -599,10 -597,6 +599,10 @@@
if (pte && pte_present(*pte) && !pte_exec(*pte))
    		printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
 +		if (pte && pte_present(*pte) && pte_exec(*pte) &&
 +				(pgd_flags(*pgd) & _PAGE_USER) &&
 +				(read_cr4() & X86_CR4_SMEP))
 +			printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
    }
printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --combined arch/x86/purgatory/Makefile
index 0000000,e5829dd..7fde9ee
mode 000000,100644..100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@@ -1,0 -1,30 +1,30 @@@
+ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
+ 
+ targets += $(purgatory-y)
+ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+ 
+ LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
+ targets += purgatory.ro
+ 
+ # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
+ # in turn leaves some undefined symbols like __fentry__ in purgatory and not
+ # sure how to relocate those. Like kexec-tools, use custom flags.
+ 
+ KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+ 
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ 		$(call if_changed,ld)
+ 
+ targets += kexec-purgatory.c
+ 
+ quiet_cmd_bin2c = BIN2C   $@
 -      cmd_bin2c = cat $(obj)/purgatory.ro | $(srctree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
++      cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
+ 
+ $(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ 	$(call if_changed,bin2c)
+ 
+ 
+ # No loaders for 32bits yet.
+ ifeq ($(CONFIG_X86_64),y)
+  obj-$(CONFIG_KEXEC)		+= kexec-purgatory.o
+ endif
diff --combined drivers/base/Kconfig
index 88500fe,9d5fed1..4e7f0ff
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@@ -149,21 -149,15 +149,21 @@@ config EXTRA_FIRMWARE_DI
      some other directory containing the firmware files.
config FW_LOADER_USER_HELPER
 +	bool
 +
 +config FW_LOADER_USER_HELPER_FALLBACK
    bool "Fallback user-helper invocation for firmware loading"
    depends on FW_LOADER
 -	default y
 +	select FW_LOADER_USER_HELPER
    help
      This option enables / disables the invocation of user-helper
      (e.g. udev) for loading firmware files as a fallback after the
      direct file loading in kernel fails.  The user-mode helper is
      no longer required unless you have a special firmware file that
 -	  resides in a non-standard path.
 +	  resides in a non-standard path. Moreover, the udev support has
 +	  been deprecated upstream.
 +
 +	  If you are unsure about this, say N here.
config DEBUG_DRIVER
    bool "Driver Core verbose debug messages"
@@@ -214,15 -208,6 +214,15 @@@ config DMA_SHARED_BUFFE
      APIs extension; the file's descriptor can then be passed on to other
      driver.
+config FENCE_TRACE
 +	bool "Enable verbose FENCE_TRACE messages"
 +	depends on DMA_SHARED_BUFFER
 +	help
 +	  Enable the FENCE_TRACE printks. This will add extra
 +	  spam to the console log, but will make it easier to diagnose
 +	  lockup related problems for dma-buffers shared across multiple
 +	  devices.
 +
  config DMA_CMA
    bool "DMA Contiguous Memory Allocator"
    depends on HAVE_DMA_CONTIGUOUS && CMA
@@@ -289,16 -274,6 +289,6 @@@ config CMA_ALIGNMEN
If unsure, leave the default value "8".
- config CMA_AREAS
- 	int "Maximum count of the CMA device-private areas"
- 	default 7
- 	help
- 	  CMA allows to create CMA areas for particular devices. This parameter
- 	  sets the maximum number of such device private CMA areas in the
- 	  system.
- 
- 	  If unsure, leave the default value "7".
- 
  endif
endmenu
diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index df89b6c,c57b085..6237727
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@@ -1027,10 -1027,9 +1027,10 @@@ static int i40e_nway_reset(struct net_d
    struct i40e_netdev_priv *np = netdev_priv(netdev);
    struct i40e_pf *pf = np->vsi->back;
    struct i40e_hw *hw = &pf->hw;
 +	bool link_up = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
    i40e_status ret = 0;
-	ret = i40e_aq_set_link_restart_an(hw, NULL);
 +	ret = i40e_aq_set_link_restart_an(hw, link_up, NULL);
    if (ret) {
    	netdev_info(netdev, "link restart failed, aq_err=%d\n",
    		    pf->hw.aq.asq_last_status);
@@@ -1106,36 -1105,17 +1106,36 @@@ static int i40e_set_coalesce(struct net
    if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
    	vsi->work_limit = ec->tx_max_coalesced_frames_irq;
+	vector = vsi->base_vector;
    if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
 -	    (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 +	    (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
    	vsi->rx_itr_setting = ec->rx_coalesce_usecs;
 -	else
 +	} else if (ec->rx_coalesce_usecs == 0) {
 +		vsi->rx_itr_setting = ec->rx_coalesce_usecs;
 +		i40e_irq_dynamic_disable(vsi, vector);
 +		if (ec->use_adaptive_rx_coalesce)
 +			netif_info(pf, drv, netdev,
 +				   "Rx-secs=0, need to disable adaptive-Rx for a complete disable\n");
 +	} else {
 +		netif_info(pf, drv, netdev,
 +			   "Invalid value, Rx-usecs range is 0, 8-8160\n");
    	return -EINVAL;
 +	}
if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
 -	    (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
 +	    (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) {
    	vsi->tx_itr_setting = ec->tx_coalesce_usecs;
 -	else
 +	} else if (ec->tx_coalesce_usecs == 0) {
 +		vsi->tx_itr_setting = ec->tx_coalesce_usecs;
 +		i40e_irq_dynamic_disable(vsi, vector);
 +		if (ec->use_adaptive_tx_coalesce)
 +			netif_info(pf, drv, netdev,
 +				   "Tx-secs=0, need to disable adaptive-Tx for a complete disable\n");
 +	} else {
 +		netif_info(pf, drv, netdev,
 +			   "Invalid value, Tx-usecs range is 0, 8-8160\n");
    	return -EINVAL;
 +	}
if (ec->use_adaptive_rx_coalesce)
    	vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
@@@ -1147,6 -1127,7 +1147,6 @@@
    else
    	vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
-	vector = vsi->base_vector;
    for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
    	q_vector = vsi->q_vectors[i];
    	q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
@@@ -1517,7 -1498,7 +1517,7 @@@ static int i40e_update_ethtool_fdir_ent
/* add filter to the list */
    if (parent)
- 		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ 		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
    else
    	hlist_add_head(&input->fdir_node,
    		       &pf->fdir_filter_list);
diff --combined fs/cifs/cifssmb.c
index b7e5b65,c3dc52e..0e706ab
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@@ -1734,7 -1734,10 +1734,7 @@@ CIFSSMBRead(const unsigned int xid, str
/*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
    if (*buf) {
 -		if (resp_buf_type == CIFS_SMALL_BUFFER)
 -			cifs_small_buf_release(iov[0].iov_base);
 -		else if (resp_buf_type == CIFS_LARGE_BUFFER)
 -			cifs_buf_release(iov[0].iov_base);
 +		free_rsp_buf(resp_buf_type, iov[0].iov_base);
    } else if (resp_buf_type != CIFS_NO_BUFFER) {
    	/* return buffer to caller to free */
    	*buf = iov[0].iov_base;
@@@ -2200,7 -2203,10 +2200,7 @@@ CIFSSMBWrite2(const unsigned int xid, s
    }
/*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
 -	if (resp_buf_type == CIFS_SMALL_BUFFER)
 -		cifs_small_buf_release(iov[0].iov_base);
 -	else if (resp_buf_type == CIFS_LARGE_BUFFER)
 -		cifs_buf_release(iov[0].iov_base);
 +	free_rsp_buf(resp_buf_type, iov[0].iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
    	since file handle passed in no longer valid */
@@@ -2424,14 -2430,14 +2424,14 @@@ CIFSSMBPosixLock(const unsigned int xid
    	}
    	parm_data = (struct cifs_posix_lock *)
    		((char *)&pSMBr->hdr.Protocol + data_offset);
- 		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+ 		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
    		pLockData->fl_type = F_UNLCK;
    	else {
    		if (parm_data->lock_type ==
- 					__constant_cpu_to_le16(CIFS_RDLCK))
+ 					cpu_to_le16(CIFS_RDLCK))
    			pLockData->fl_type = F_RDLCK;
    		else if (parm_data->lock_type ==
- 					__constant_cpu_to_le16(CIFS_WRLCK))
+ 					cpu_to_le16(CIFS_WRLCK))
    			pLockData->fl_type = F_WRLCK;
pLockData->fl_start = le64_to_cpu(parm_data->start);
@@@ -2445,7 -2451,10 +2445,7 @@@ plk_err_exit
    if (pSMB)
    	cifs_small_buf_release(pSMB);
-	if (resp_buf_type == CIFS_SMALL_BUFFER)
 -		cifs_small_buf_release(iov[0].iov_base);
 -	else if (resp_buf_type == CIFS_LARGE_BUFFER)
 -		cifs_buf_release(iov[0].iov_base);
 +	free_rsp_buf(resp_buf_type, iov[0].iov_base);
/* Note: On -EAGAIN error only caller can retry on handle based calls
       since file handle passed in no longer valid */
@@@ -3223,25 -3232,25 +3223,25 @@@ CIFSSMB_set_compression(const unsigned 
    pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
pSMB->TotalParameterCount = 0;
- 	pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+ 	pSMB->TotalDataCount = cpu_to_le32(2);
    pSMB->MaxParameterCount = 0;
    pSMB->MaxDataCount = 0;
    pSMB->MaxSetupCount = 4;
    pSMB->Reserved = 0;
    pSMB->ParameterOffset = 0;
- 	pSMB->DataCount = __constant_cpu_to_le32(2);
+ 	pSMB->DataCount = cpu_to_le32(2);
    pSMB->DataOffset =
    	cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
    			compression_state) - 4);  /* 84 */
    pSMB->SetupCount = 4;
- 	pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+ 	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
    pSMB->ParameterCount = 0;
- 	pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+ 	pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
    pSMB->IsFsctl = 1; /* FSCTL */
    pSMB->IsRootFlag = 0;
    pSMB->Fid = fid; /* file handle always le */
    /* 3 byte pad, followed by 2 byte compress state */
- 	pSMB->ByteCount = __constant_cpu_to_le16(5);
+ 	pSMB->ByteCount = cpu_to_le16(5);
    inc_rfc1001_len(pSMB, 5);
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@@ -3377,10 -3386,10 +3377,10 @@@ static __u16 ACL_to_cifs_posix(char *pa
    cifs_acl->version = cpu_to_le16(1);
    if (acl_type == ACL_TYPE_ACCESS) {
    	cifs_acl->access_entry_count = cpu_to_le16(count);
- 		cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ 		cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
    } else if (acl_type == ACL_TYPE_DEFAULT) {
    	cifs_acl->default_entry_count = cpu_to_le16(count);
- 		cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ 		cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
    } else {
    	cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
    	return 0;
@@@ -3829,7 -3838,10 +3829,7 @@@ CIFSSMBGetCIFSACL(const unsigned int xi
    	}
    }
  qsec_out:
 -	if (buf_type == CIFS_SMALL_BUFFER)
 -		cifs_small_buf_release(iov[0].iov_base);
 -	else if (buf_type == CIFS_LARGE_BUFFER)
 -		cifs_buf_release(iov[0].iov_base);
 +	free_rsp_buf(buf_type, iov[0].iov_base);
  /*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
    return rc;
  }
diff --combined fs/cifs/sess.c
index 36d0b90,27e6175..db6ecdf
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@@ -46,7 -46,7 +46,7 @@@ static __u32 cifs_ssetup_hdr(struct cif
    				CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
    				USHRT_MAX));
    pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
- 	pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+ 	pSMB->req.VcNumber = cpu_to_le16(1);
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@@ -520,240 -520,6 +520,240 @@@ select_sectype(struct TCP_Server_Info *
    }
  }
+struct sess_data {
 +	unsigned int xid;
 +	struct cifs_ses *ses;
 +	struct nls_table *nls_cp;
 +	void (*func)(struct sess_data *);
 +	int result;
 +
 +	/* we will send the SMB in three pieces:
 +	 * a fixed length beginning part, an optional
 +	 * SPNEGO blob (which can be zero length), and a
 +	 * last part which will include the strings
 +	 * and rest of bcc area. This allows us to avoid
 +	 * a large buffer 17K allocation
 +	 */
 +	int buf0_type;
 +	struct kvec iov[3];
 +};
 +
 +static int
 +sess_alloc_buffer(struct sess_data *sess_data, int wct)
 +{
 +	int rc;
 +	struct cifs_ses *ses = sess_data->ses;
 +	struct smb_hdr *smb_buf;
 +
 +	rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
 +				  (void **)&smb_buf);
 +
 +	if (rc)
 +		return rc;
 +
 +	sess_data->iov[0].iov_base = (char *)smb_buf;
 +	sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
 +	/*
 +	 * This variable will be used to clear the buffer
 +	 * allocated above in case of any error in the calling function.
 +	 */
 +	sess_data->buf0_type = CIFS_SMALL_BUFFER;
 +
 +	/* 2000 big enough to fit max user, domain, NOS name etc. */
 +	sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL);
 +	if (!sess_data->iov[2].iov_base) {
 +		rc = -ENOMEM;
 +		goto out_free_smb_buf;
 +	}
 +
 +	return 0;
 +
 +out_free_smb_buf:
 +	kfree(smb_buf);
 +	sess_data->iov[0].iov_base = NULL;
 +	sess_data->iov[0].iov_len = 0;
 +	sess_data->buf0_type = CIFS_NO_BUFFER;
 +	return rc;
 +}
 +
 +static void
 +sess_free_buffer(struct sess_data *sess_data)
 +{
 +
 +	free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
 +	sess_data->buf0_type = CIFS_NO_BUFFER;
 +	kfree(sess_data->iov[2].iov_base);
 +}
 +
 +static int
 +sess_establish_session(struct sess_data *sess_data)
 +{
 +	struct cifs_ses *ses = sess_data->ses;
 +
 +	mutex_lock(&ses->server->srv_mutex);
 +	if (!ses->server->session_estab) {
 +		if (ses->server->sign) {
 +			ses->server->session_key.response =
 +				kmemdup(ses->auth_key.response,
 +				ses->auth_key.len, GFP_KERNEL);
 +			if (!ses->server->session_key.response) {
 +				mutex_unlock(&ses->server->srv_mutex);
 +				return -ENOMEM;
 +			}
 +			ses->server->session_key.len =
 +						ses->auth_key.len;
 +		}
 +		ses->server->sequence_number = 0x2;
 +		ses->server->session_estab = true;
 +	}
 +	mutex_unlock(&ses->server->srv_mutex);
 +
 +	cifs_dbg(FYI, "CIFS session established successfully\n");
 +	spin_lock(&GlobalMid_Lock);
 +	ses->status = CifsGood;
 +	ses->need_reconnect = false;
 +	spin_unlock(&GlobalMid_Lock);
 +
 +	return 0;
 +}
 +
 +static int
 +sess_sendreceive(struct sess_data *sess_data)
 +{
 +	int rc;
 +	struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base;
 +	__u16 count;
 +
 +	count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len;
 +	smb_buf->smb_buf_length =
 +		cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
 +	put_bcc(count, smb_buf);
 +
 +	rc = SendReceive2(sess_data->xid, sess_data->ses,
 +			  sess_data->iov, 3 /* num_iovecs */,
 +			  &sess_data->buf0_type,
 +			  CIFS_LOG_ERROR);
 +
 +	return rc;
 +}
 +
 +/*
 + * LANMAN and plaintext are less secure and off by default.
 + * So we make this explicitly be turned on in kconfig (in the
 + * build) and turned on at runtime (changed from the default)
 + * in proc/fs/cifs or via mount parm.  Unfortunately this is
 + * needed for old Win (e.g. Win95), some obscure NAS and OS/2
 + */
 +#ifdef CONFIG_CIFS_WEAK_PW_HASH
 +static void
 +sess_auth_lanman(struct sess_data *sess_data)
 +{
 +	int rc = 0;
 +	struct smb_hdr *smb_buf;
 +	SESSION_SETUP_ANDX *pSMB;
 +	char *bcc_ptr;
 +	struct cifs_ses *ses = sess_data->ses;
 +	char lnm_session_key[CIFS_AUTH_RESP_SIZE];
 +	__u32 capabilities;
 +	__u16 bytes_remaining;
 +
 +	/* lanman 2 style sessionsetup */
 +	/* wct = 10 */
 +	rc = sess_alloc_buffer(sess_data, 10);
 +	if (rc)
 +		goto out;
 +
 +	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 +	bcc_ptr = sess_data->iov[2].iov_base;
 +	capabilities = cifs_ssetup_hdr(ses, pSMB);
 +
 +	pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
 +
 +	/* no capabilities flags in old lanman negotiation */
 +	pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
 +
 +	/* Calculate hash with password and copy into bcc_ptr.
 +	 * Encryption Key (stored as in cryptkey) gets used if the
 +	 * security mode bit in Negottiate Protocol response states
 +	 * to use challenge/response method (i.e. Password bit is 1).
 +	 */
 +	rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 +			      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
 +			      true : false, lnm_session_key);
 +
 +	memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
 +	bcc_ptr += CIFS_AUTH_RESP_SIZE;
 +
 +	/*
 +	 * can not sign if LANMAN negotiated so no need
 +	 * to calculate signing key? but what if server
 +	 * changed to do higher than lanman dialect and
 +	 * we reconnected would we ever calc signing_key?
 +	 */
 +
 +	cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
 +	/* Unicode not allowed for LANMAN dialects */
 +	ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
 +
 +	sess_data->iov[2].iov_len = (long) bcc_ptr -
 +			(long) sess_data->iov[2].iov_base;
 +
 +	rc = sess_sendreceive(sess_data);
 +	if (rc)
 +		goto out;
 +
 +	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 +	smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
 +
 +	/* lanman response has a word count of 3 */
 +	if (smb_buf->WordCount != 3) {
 +		rc = -EIO;
 +		cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
 +		goto out;
 +	}
 +
 +	if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
 +		cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
 +
 +	ses->Suid = smb_buf->Uid;   /* UID left in wire format (le) */
 +	cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
 +
 +	bytes_remaining = get_bcc(smb_buf);
 +	bcc_ptr = pByteArea(smb_buf);
 +
 +	/* BB check if Unicode and decode strings */
 +	if (bytes_remaining == 0) {
 +		/* no string area to decode, do nothing */
 +	} else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
 +		/* unicode string area must be word-aligned */
 +		if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
 +			++bcc_ptr;
 +			--bytes_remaining;
 +		}
 +		decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
 +				      sess_data->nls_cp);
 +	} else {
 +		decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
 +				    sess_data->nls_cp);
 +	}
 +
 +	rc = sess_establish_session(sess_data);
 +out:
 +	sess_data->result = rc;
 +	sess_data->func = NULL;
 +	sess_free_buffer(sess_data);
 +}
 +
 +#else
 +
 +static void
 +sess_auth_lanman(struct sess_data *sess_data)
 +{
 +	sess_data->result = -EOPNOTSUPP;
 +	sess_data->func = NULL;
 +}
 +#endif
 +
  int
  CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
           const struct nls_table *nls_cp)
@@@ -774,21 -540,12 +774,21 @@@
    __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
    u16 blob_len;
    char *ntlmsspblob = NULL;
 +	struct sess_data *sess_data;
if (ses == NULL) {
    	WARN(1, "%s: ses == NULL!", __func__);
    	return -EINVAL;
    }
+	sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL);
 +	if (!sess_data)
 +		return -ENOMEM;
 +	sess_data->xid = xid;
 +	sess_data->ses = ses;
 +	sess_data->buf0_type = CIFS_NO_BUFFER;
 +	sess_data->nls_cp = (struct nls_table *) nls_cp;
 +
    type = select_sectype(ses->server, ses->sectype);
    cifs_dbg(FYI, "sess setup type %d\n", type);
    if (type == Unspecified) {
@@@ -797,14 -554,6 +797,14 @@@
    	return -EINVAL;
    }
+	switch (type) {
 +	case LANMAN:
 +		sess_auth_lanman(sess_data);
 +		goto out;
 +	default:
 +		cifs_dbg(FYI, "Continuing with CIFS_SessSetup\n");
 +	}
 +
    if (type == RawNTLMSSP) {
    	/* if memory allocation is successful, caller of this function
    	 * frees it.
@@@ -820,7 -569,17 +820,7 @@@ ssetup_ntlmssp_authenticate
    if (phase == NtLmChallenge)
    	phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
-	if (type == LANMAN) {
 -#ifndef CONFIG_CIFS_WEAK_PW_HASH
 -		/* LANMAN and plaintext are less secure and off by default.
 -		So we make this explicitly be turned on in kconfig (in the
 -		build) and turned on at runtime (changed from the default)
 -		in proc/fs/cifs or via mount parm.  Unfortunately this is
 -		needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
 -		return -EOPNOTSUPP;
 -#endif
 -		wct = 10; /* lanman 2 style sessionsetup */
 -	} else if ((type == NTLM) || (type == NTLMv2)) {
 +	if ((type == NTLM) || (type == NTLMv2)) {
    	/* For NTLMv2 failures eventually may need to retry NTLM */
    	wct = 13; /* old style NTLM sessionsetup */
    } else /* same size: negotiate or auth, NTLMSSP or extended security */
@@@ -859,7 -618,39 +859,7 @@@
    iov[1].iov_base = NULL;
    iov[1].iov_len = 0;
-	if (type == LANMAN) {
 -#ifdef CONFIG_CIFS_WEAK_PW_HASH
 -		char lnm_session_key[CIFS_AUTH_RESP_SIZE];
 -
 -		pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
 -
 -		/* no capabilities flags in old lanman negotiation */
 -
 -		pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
 -
 -		/* Calculate hash with password and copy into bcc_ptr.
 -		 * Encryption Key (stored as in cryptkey) gets used if the
 -		 * security mode bit in Negottiate Protocol response states
 -		 * to use challenge/response method (i.e. Password bit is 1).
 -		 */
 -
 -		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 -				 ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
 -					true : false, lnm_session_key);
 -
 -		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
 -		bcc_ptr += CIFS_AUTH_RESP_SIZE;
 -
 -		/* can not sign if LANMAN negotiated so no need
 -		to calculate signing key? but what if server
 -		changed to do higher than lanman dialect and
 -		we reconnected would we ever calc signing_key? */
 -
 -		cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
 -		/* Unicode not allowed for LANMAN dialects */
 -		ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 -#endif
 -	} else if (type == NTLM) {
 +	if (type == NTLM) {
    	pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
    	pSMB->req_no_secext.CaseInsensitivePasswordLength =
    		cpu_to_le16(CIFS_AUTH_RESP_SIZE);
@@@ -1098,6 -889,7 +1098,6 @@@
    	}
    	if (phase == NtLmChallenge) {
    		rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
 -			/* now goto beginning for ntlmssp authenticate phase */
    		if (rc)
    			goto ssetup_exit;
    	}
@@@ -1170,9 -962,4 +1170,9 @@@ keycp_exit
    kfree(ses->ntlmssp);
return rc;
 +
 +out:
 +	rc = sess_data->result;
 +	kfree(sess_data);
 +	return rc;
  }
diff --combined fs/cifs/smb2pdu.c
index 0158104,a9b03c2..b3f05b4
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@@ -309,6 -309,16 +309,6 @@@ small_smb2_init(__le16 smb2_command, st
    return rc;
  }
-static void
 -free_rsp_buf(int resp_buftype, void *rsp)
 -{
 -	if (resp_buftype == CIFS_SMALL_BUFFER)
 -		cifs_small_buf_release(rsp);
 -	else if (resp_buftype == CIFS_LARGE_BUFFER)
 -		cifs_buf_release(rsp);
 -}
 -
 -
  /*
   *
   *	SMB2 Worker functions follow:
@@@ -1359,7 -1369,7 +1359,7 @@@ SMB2_set_compression(const unsigned in
    char *ret_data = NULL;
fsctl_input.CompressionState =
- 			__constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ 			cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
    		FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --combined fs/namespace.c
index b10db3d,2a1447c..019ff81
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@@ -225,7 -225,6 +225,7 @@@ static struct mount *alloc_vfsmnt(cons
    	INIT_LIST_HEAD(&mnt->mnt_share);
    	INIT_LIST_HEAD(&mnt->mnt_slave_list);
    	INIT_LIST_HEAD(&mnt->mnt_slave);
 +		INIT_LIST_HEAD(&mnt->mnt_mp_list);
  #ifdef CONFIG_FSNOTIFY
    	INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
  #endif
@@@ -668,45 -667,11 +668,45 @@@ struct vfsmount *lookup_mnt(struct pat
    return m;
  }
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
 +/*
 + * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
 + *                         current mount namespace.
 + *
 + * The common case is dentries are not mountpoints at all and that
 + * test is handled inline.  For the slow case when we are actually
 + * dealing with a mountpoint of some kind, walk through all of the
 + * mounts in the current mount namespace and test to see if the dentry
 + * is a mountpoint.
 + *
 + * The mount_hashtable is not usable in the context because we
 + * need to identify all mounts that may be in the current mount
 + * namespace not just a mount that happens to have some specified
 + * parent mount.
 + */
 +bool __is_local_mountpoint(struct dentry *dentry)
 +{
 +	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
 +	struct mount *mnt;
 +	bool is_covered = false;
 +
 +	if (!d_mountpoint(dentry))
 +		goto out;
 +
 +	down_read(&namespace_sem);
 +	list_for_each_entry(mnt, &ns->list, mnt_list) {
 +		is_covered = (mnt->mnt_mountpoint == dentry);
 +		if (is_covered)
 +			break;
 +	}
 +	up_read(&namespace_sem);
 +out:
 +	return is_covered;
 +}
 +
 +static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
  {
    struct hlist_head *chain = mp_hash(dentry);
    struct mountpoint *mp;
 -	int ret;
hlist_for_each_entry(mp, chain, m_hash) {
    	if (mp->m_dentry == dentry) {
@@@ -717,14 -682,6 +717,14 @@@
    		return mp;
    	}
    }
 +	return NULL;
 +}
 +
 +static struct mountpoint *new_mountpoint(struct dentry *dentry)
 +{
 +	struct hlist_head *chain = mp_hash(dentry);
 +	struct mountpoint *mp;
 +	int ret;
mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
    if (!mp)
@@@ -739,7 -696,6 +739,7 @@@
    mp->m_dentry = dentry;
    mp->m_count = 1;
    hlist_add_head(&mp->m_hash, chain);
 +	INIT_LIST_HEAD(&mp->m_list);
    return mp;
  }
@@@ -747,7 -703,6 +747,7 @@@ static void put_mountpoint(struct mount
  {
    if (!--mp->m_count) {
    	struct dentry *dentry = mp->m_dentry;
 +		BUG_ON(!list_empty(&mp->m_list));
    	spin_lock(&dentry->d_lock);
    	dentry->d_flags &= ~DCACHE_MOUNTED;
    	spin_unlock(&dentry->d_lock);
@@@ -794,7 -749,6 +794,7 @@@ static void detach_mnt(struct mount *mn
    mnt->mnt_mountpoint = mnt->mnt.mnt_root;
    list_del_init(&mnt->mnt_child);
    hlist_del_init_rcu(&mnt->mnt_hash);
 +	list_del_init(&mnt->mnt_mp_list);
    put_mountpoint(mnt->mnt_mp);
    mnt->mnt_mp = NULL;
  }
@@@ -811,7 -765,6 +811,7 @@@ void mnt_set_mountpoint(struct mount *m
    child_mnt->mnt_mountpoint = dget(mp->m_dentry);
    child_mnt->mnt_parent = mnt;
    child_mnt->mnt_mp = mp;
 +	list_add_tail(&child_mnt->mnt_mp_list, &mp->m_list);
  }
/*
@@@ -845,7 -798,7 +845,7 @@@ static void commit_tree(struct mount *m
    list_splice(&head, n->list.prev);
if (shadows)
- 		hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ 		hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
    else
    	hlist_add_head_rcu(&mnt->mnt_hash,
    			m_hash(&parent->mnt, mnt->mnt_mountpoint));
@@@ -983,25 -936,9 +983,25 @@@ static struct mount *clone_mnt(struct m
    return ERR_PTR(err);
  }
+static void cleanup_mnt(struct mount *mnt)
 +{
 +	fsnotify_vfsmount_delete(&mnt->mnt);
 +	dput(mnt->mnt.mnt_root);
 +	deactivate_super(mnt->mnt.mnt_sb);
 +	mnt_free_id(mnt);
 +	complete(mnt->mnt_undone);
 +	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
 +}
 +
 +static void cleanup_mnt_work(struct work_struct *work)
 +{
 +	cleanup_mnt(container_of(work, struct mount, mnt_cleanup_work));
 +}
 +
  static void mntput_no_expire(struct mount *mnt)
  {
 -put_again:
 +	struct completion undone;
 +
    rcu_read_lock();
    mnt_add_count(mnt, -1);
    if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@@ -1015,15 -952,12 +1015,15 @@@
    	return;
    }
    if (unlikely(mnt->mnt_pinned)) {
 -		mnt_add_count(mnt, mnt->mnt_pinned + 1);
 +		init_completion(&undone);
 +		mnt->mnt_undone = &undone;
 +		mnt_add_count(mnt, mnt->mnt_pinned);
    	mnt->mnt_pinned = 0;
    	rcu_read_unlock();
    	unlock_mount_hash();
    	acct_auto_close_mnt(&mnt->mnt);
 -		goto put_again;
 +		wait_for_completion(&undone);
 +		return;
    }
    if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
    	rcu_read_unlock();
@@@ -1047,19 -981,11 +1047,19 @@@
     * so mnt_get_writers() below is safe.
     */
    WARN_ON(mnt_get_writers(mnt));
 -	fsnotify_vfsmount_delete(&mnt->mnt);
 -	dput(mnt->mnt.mnt_root);
 -	deactivate_super(mnt->mnt.mnt_sb);
 -	mnt_free_id(mnt);
 -	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
 +	/* The stack may be deep here, cleanup the mount on a work
 +	 * queue where the stack is guaranteed to be shallow.
 +	 */
 +	init_completion(&undone);
 +	if (!mnt->mnt_undone)
 +		mnt->mnt_undone = &undone;
 +	else
 +		complete(&undone);
 +
 +	INIT_WORK(&mnt->mnt_cleanup_work, cleanup_mnt_work);
 +	schedule_work(&mnt->mnt_cleanup_work);
 +
 +	wait_for_completion(&undone);
  }
void mntput(struct vfsmount *mnt)
@@@ -1335,7 -1261,6 +1335,7 @@@ void umount_tree(struct mount *mnt, in
    		p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
    	list_del_init(&p->mnt_child);
    	if (mnt_has_parent(p)) {
 +			list_del_init(&p->mnt_mp_list);
    		put_mountpoint(p->mnt_mp);
    		/* move the reference to mountpoint into ->mnt_ex_mountpoint */
    		p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
@@@ -1448,37 -1373,6 +1448,37 @@@ static int do_umount(struct mount *mnt
    return retval;
  }
+/*
 + * __detach_mounts - lazily unmount all mounts on the specified dentry
 + *
 + * During unlink, rmdir, and d_drop it is possible to loose the path
 + * to an existing mountpoint, and wind up leaking the mount.
 + * detach_mounts allows lazily unmounting those mounts instead of
 + * leaking them.
 + * 
 + * The caller may hold dentry->d_inode->i_mutex.
 + */
 +void __detach_mounts(struct dentry *dentry)
 +{
 +	struct mountpoint *mp;
 +	struct mount *mnt;
 +
 +	namespace_lock();
 +	mp = lookup_mountpoint(dentry);
 +	if (!mp)
 +		goto out_unlock;
 +
 +	lock_mount_hash();
 +	while (!list_empty(&mp->m_list)) {
 +		mnt = list_first_entry(&mp->m_list, struct mount, mnt_mp_list);
 +		umount_tree(mnt, 2);
 +	}
 +	unlock_mount_hash();
 +	put_mountpoint(mp);
 +out_unlock:
 +	namespace_unlock();
 +}
 +
  /* 
   * Is the caller allowed to modify his namespace?
   */
@@@ -1828,9 -1722,7 +1828,9 @@@ retry
    namespace_lock();
    mnt = lookup_mnt(path);
    if (likely(!mnt)) {
 -		struct mountpoint *mp = new_mountpoint(dentry);
 +		struct mountpoint *mp = lookup_mountpoint(dentry);
 +		if (!mp)
 +			mp = new_mountpoint(dentry);
    	if (IS_ERR(mp)) {
    		namespace_unlock();
    		mutex_unlock(&dentry->d_inode->i_mutex);
diff --combined fs/proc/base.c
index e442784,79df9ff..feb01e0
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -1625,6 -1625,7 +1625,6 @@@ int pid_revalidate(struct dentry *dentr
    	put_task_struct(task);
    	return 1;
    }
 -	d_drop(dentry);
    return 0;
  }
@@@ -1761,6 -1762,9 +1761,6 @@@ out
    put_task_struct(task);
out_notask:
 -	if (status <= 0)
 -		d_drop(dentry);
 -
    return status;
  }
@@@ -2445,7 -2449,7 +2445,7 @@@ static int proc_tgid_io_accounting(stru
#ifdef CONFIG_USER_NS
  static int proc_id_map_open(struct inode *inode, struct file *file,
- 	struct seq_operations *seq_ops)
+ 	const struct seq_operations *seq_ops)
  {
    struct user_namespace *ns = NULL;
    struct task_struct *task;
@@@ -2672,7 -2676,8 +2672,7 @@@ static void proc_flush_task_mnt(struct 
    /* no ->d_hash() rejects on procfs */
    dentry = d_hash_and_lookup(mnt->mnt_root, &name);
    if (dentry) {
 -		shrink_dcache_parent(dentry);
 -		d_drop(dentry);
 +		d_invalidate(dentry);
    	dput(dentry);
    }
@@@ -2692,7 -2697,8 +2692,7 @@@
    name.len = snprintf(buf, sizeof(buf), "%d", pid);
    dentry = d_hash_and_lookup(dir, &name);
    if (dentry) {
 -		shrink_dcache_parent(dentry);
 -		d_drop(dentry);
 +		d_invalidate(dentry);
    	dput(dentry);
    }
diff --combined include/linux/kernel.h
index a9e2268,44a498d..e989204
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@@ -470,6 -470,7 +470,7 @@@ extern enum system_states 
  #define TAINT_FIRMWARE_WORKAROUND	11
  #define TAINT_OOT_MODULE		12
  #define TAINT_UNSIGNED_MODULE		13
+ #define TAINT_SOFTLOCKUP		14
extern const char hex_asc[];
  #define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
@@@ -493,15 -494,10 +494,10 @@@ static inline char *hex_byte_pack_upper
    return buf;
  }
extern int hex_to_bin(char ch);
  extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
-int mac_pton(const char *s, u8 *mac);
 +bool mac_pton(const char *s, u8 *mac);
/*
   * General tracing related utility functions - trace_printk(),
@@@ -719,23 -715,8 +715,8 @@@ static inline void ftrace_dump(enum ftr
    (void) (&_max1 == &_max2);		\
    _max1 > _max2 ? _max1 : _max2; })
- #define min3(x, y, z) ({			\
- 	typeof(x) _min1 = (x);			\
- 	typeof(y) _min2 = (y);			\
- 	typeof(z) _min3 = (z);			\
- 	(void) (&_min1 == &_min2);		\
- 	(void) (&_min1 == &_min3);		\
- 	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
- 		(_min2 < _min3 ? _min2 : _min3); })
- 
- #define max3(x, y, z) ({			\
- 	typeof(x) _max1 = (x);			\
- 	typeof(y) _max2 = (y);			\
- 	typeof(z) _max3 = (z);			\
- 	(void) (&_max1 == &_max2);		\
- 	(void) (&_max1 == &_max3);		\
- 	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
- 		(_max2 > _max3 ? _max2 : _max3); })
+ #define min3(x, y, z) min((typeof(x))min(x, y), z)
+ #define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
   * min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@@ -750,20 -731,13 +731,13 @@@
  /**
   * clamp - return a value clamped to a given range with strict typechecking
   * @val: current value
-  * @min: minimum allowable value
-  * @max: maximum allowable value
+  * @lo: lowest allowable value
+  * @hi: highest allowable value
   *
   * This macro does strict typechecking of min/max to make sure they are of the
   * same type as val.  See the unnecessary pointer comparisons.
   */
- #define clamp(val, min, max) ({			\
- 	typeof(val) __val = (val);		\
- 	typeof(min) __min = (min);		\
- 	typeof(max) __max = (max);		\
- 	(void) (&__val == &__min);		\
- 	(void) (&__val == &__max);		\
- 	__val = __val < __min ? __min: __val;	\
- 	__val > __max ? __max: __val; })
+ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
/*
   * ..and if you can't take the strict
diff --combined include/linux/sched.h
index b39a671,1d169c8..ec89295
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -33,6 -33,7 +33,7 @@@ struct sched_param
#include <linux/smp.h>
  #include <linux/sem.h>
+ #include <linux/shm.h>
  #include <linux/signal.h>
  #include <linux/compiler.h>
  #include <linux/completion.h>
@@@ -872,21 -873,21 +873,21 @@@ enum cpu_idle_type 
  #define SD_NUMA			0x4000	/* cross-node balancing */
#ifdef CONFIG_SCHED_SMT
 -static inline const int cpu_smt_flags(void)
 +static inline int cpu_smt_flags(void)
  {
    return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
  }
  #endif
#ifdef CONFIG_SCHED_MC
 -static inline const int cpu_core_flags(void)
 +static inline int cpu_core_flags(void)
  {
    return SD_SHARE_PKG_RESOURCES;
  }
  #endif
#ifdef CONFIG_NUMA
 -static inline const int cpu_numa_flags(void)
 +static inline int cpu_numa_flags(void)
  {
    return SD_NUMA;
  }
@@@ -999,7 -1000,7 +1000,7 @@@ void free_sched_domains(cpumask_var_t d
  bool cpus_share_cache(int this_cpu, int that_cpu);
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
 -typedef const int (*sched_domain_flags_f)(void);
 +typedef int (*sched_domain_flags_f)(void);
#define SDTL_OVERLAP	0x01
@@@ -1270,6 -1271,9 +1271,6 @@@ struct task_struct 
  #ifdef CONFIG_TREE_PREEMPT_RCU
    struct rcu_node *rcu_blocked_node;
  #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 -#ifdef CONFIG_RCU_BOOST
 -	struct rt_mutex *rcu_boost_mutex;
 -#endif /* #ifdef CONFIG_RCU_BOOST */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    struct sched_info sched_info;
@@@ -1386,6 -1390,7 +1387,7 @@@
  #ifdef CONFIG_SYSVIPC
  /* ipc stuff */
    struct sysv_sem sysvsem;
+ 	struct sysv_shm sysvshm;
  #endif
  #ifdef CONFIG_DETECT_HUNG_TASK
  /* hung task detection */
@@@ -2006,6 -2011,9 +2008,6 @@@ static inline void rcu_copy_process(str
  #ifdef CONFIG_TREE_PREEMPT_RCU
    p->rcu_blocked_node = NULL;
  #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 -#ifdef CONFIG_RCU_BOOST
 -	p->rcu_boost_mutex = NULL;
 -#endif /* #ifdef CONFIG_RCU_BOOST */
    INIT_LIST_HEAD(&p->rcu_node_entry);
  }
diff --combined include/scsi/scsi.h
index 91e2e42,d34cf2d..4b69139
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@@ -31,7 -31,7 +31,7 @@@ enum scsi_timeouts 
   * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
   * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
   */
- #ifdef ARCH_HAS_SG_CHAIN
+ #ifdef CONFIG_ARCH_HAS_SG_CHAIN
  #define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
  #else
  #define SCSI_MAX_SG_CHAIN_SEGMENTS	SCSI_MAX_SG_SEGMENTS
@@@ -385,7 -385,7 +385,7 @@@ struct scsi_lun 
  #define SCSI_W_LUN_ACCESS_CONTROL (SCSI_W_LUN_BASE + 2)
  #define SCSI_W_LUN_TARGET_LOG_PAGE (SCSI_W_LUN_BASE + 3)
-static inline int scsi_is_wlun(unsigned int lun)
 +static inline int scsi_is_wlun(u64 lun)
  {
    return (lun & 0xff00) == SCSI_W_LUN_BASE;
  }
diff --combined init/Kconfig
index 85fb985,b66f859..1dfdd81
--- a/init/Kconfig
+++ b/init/Kconfig
@@@ -505,7 -505,7 +505,7 @@@ config PREEMPT_RC
    def_bool TREE_PREEMPT_RCU
    help
      This option enables preemptible-RCU code that is common between
 -	  the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
 +	  TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
config RCU_STALL_COMMON
    def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
@@@ -737,7 -737,7 +737,7 @@@ choic
config RCU_NOCB_CPU_NONE
    bool "No build_forced no-CBs CPUs"
 -	depends on RCU_NOCB_CPU && !NO_HZ_FULL
 +	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
    help
      This option does not force any of the CPUs to be no-CBs CPUs.
      Only CPUs designated by the rcu_nocbs= boot parameter will be
@@@ -751,7 -751,7 +751,7 @@@
config RCU_NOCB_CPU_ZERO
    bool "CPU 0 is a build_forced no-CBs CPU"
 -	depends on RCU_NOCB_CPU && !NO_HZ_FULL
 +	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
    help
      This option forces CPU 0 to be a no-CBs CPU, so that its RCU
      callbacks are invoked by a per-CPU kthread whose name begins
@@@ -783,8 -783,13 +783,13 @@@ endchoic
endmenu # "RCU Subsystem"
+ config BUILD_BIN2C
+ 	bool
+ 	default n
+ 
  config IKCONFIG
    tristate "Kernel .config support"
+ 	select BUILD_BIN2C
    ---help---
      This option enables the complete Linux kernel ".config" file
      contents to be saved in the kernel. It provides documentation
@@@ -807,15 -812,53 +812,53 @@@ config LOG_BUF_SHIF
    range 12 21
    default 17
    help
- 	  Select kernel log buffer size as a power of 2.
+ 	  Select the minimal kernel log buffer size as a power of 2.
+ 	  The final size is affected by LOG_CPU_MAX_BUF_SHIFT config
+ 	  parameter, see below. Any higher size also might be forced
+ 	  by "log_buf_len" boot parameter.
+ 
      Examples:
- 	  	     17 => 128 KB
+ 		     17 => 128 KB
    	     16 => 64 KB
- 	             15 => 32 KB
- 	             14 => 16 KB
+ 		     15 => 32 KB
+ 		     14 => 16 KB
    	     13 =>  8 KB
    	     12 =>  4 KB
+ config LOG_CPU_MAX_BUF_SHIFT
+ 	int "CPU kernel log buffer size contribution (13 => 8 KB, 17 => 128KB)"
+ 	range 0 21
+ 	default 12 if !BASE_SMALL
+ 	default 0 if BASE_SMALL
+ 	help
+ 	  This option allows to increase the default ring buffer size
+ 	  according to the number of CPUs. The value defines the contribution
+ 	  of each CPU as a power of 2. The used space is typically only few
+ 	  lines however it might be much more when problems are reported,
+ 	  e.g. backtraces.
+ 
+ 	  The increased size means that a new buffer has to be allocated and
+ 	  the original static one is unused. It makes sense only on systems
+ 	  with more CPUs. Therefore this value is used only when the sum of
+ 	  contributions is greater than the half of the default kernel ring
+ 	  buffer as defined by LOG_BUF_SHIFT. The default values are set
+ 	  so that more than 64 CPUs are needed to trigger the allocation.
+ 
+ 	  Also this option is ignored when "log_buf_len" kernel parameter is
+ 	  used as it forces an exact (power of two) size of the ring buffer.
+ 
+ 	  The number of possible CPUs is used for this computation ignoring
+ 	  hotplugging making the compuation optimal for the the worst case
+ 	  scenerio while allowing a simple algorithm to be used from bootup.
+ 
+ 	  Examples shift values and their meaning:
+ 		     17 => 128 KB for each CPU
+ 		     16 =>  64 KB for each CPU
+ 		     15 =>  32 KB for each CPU
+ 		     14 =>  16 KB for each CPU
+ 		     13 =>   8 KB for each CPU
+ 		     12 =>   4 KB for each CPU
+ 
  #
  # Architectures with an unreliable sched_clock() should select this:
  #
@@@ -1264,77 -1307,6 +1307,77 @@@ config CC_OPTIMIZE_FOR_SIZ
If unsure, say N.
+config LTO_MENU
 +	bool "Enable gcc link time optimization (LTO)"
 +	# Only tested on X86 for now. For other architectures you likely
 +	# have to fix some things first, like adding asmlinkages etc.
 +	depends on X86
 +	# lto does not support excluding flags for specific files
 +	# right now. Can be removed if that is fixed.
 +	depends on !FUNCTION_TRACER
 +	help
 +	  With this option gcc will do whole program optimizations for
 +	  the whole kernel and module. This increases compile time, but can
 +	  lead to better code. It allows gcc to inline functions between
 +	  different files and do other optimization.  It might also trigger
 +	  bugs due to more aggressive optimization. It allows gcc to drop unused
 +	  code. On smaller monolithic kernel configurations
 +	  it usually leads to smaller kernels, especially when modules
 +	  are disabled.
 +
 +	  With this option gcc will also do some global checking over
 +	  different source files. It also disables a number of kernel
 +	  features.
 +
 +	  This option is recommended for release builds. With LTO
 +	  the kernel always has to be re-optimized (but not re-parsed)
 +	  on each build.
 +
 +	  This requires a gcc 4.8 or later compiler and
 +	  Linux binutils 2.21.51.0.3 or later.  gcc 4.9 builds significantly
 +	  faster than 4.8 It does not currently work with a FSF release of
 +	  binutils or with the gold linker.
 +
 +	  On larger configurations this may need more than 4GB of RAM.
 +	  It will likely not work on those with a 32bit compiler.
 +
 +	  When the toolchain support is not available this will (hopefully)
 +	  be automatically disabled.
 +
 +	  For more information see Documentation/lto-build
 +
 +config LTO_DISABLE
 +         bool "Disable LTO again"
 +         depends on LTO_MENU
 +         default n
 +         help
 +           This option is merely here so that allyesconfig or allmodconfig do
 +           not enable LTO. If you want to actually use LTO do not enable.
 +
 +config LTO
 +	bool
 +	default y
 +	depends on LTO_MENU && !LTO_DISABLE
 +
 +config LTO_DEBUG
 +	bool "Enable LTO compile time debugging"
 +	depends on LTO
 +	help
 +	  Enable LTO debugging in the compiler. The compiler dumps
 +	  some log files that make it easier to figure out LTO
 +	  behavior. The log files also allow to reconstruct
 +	  the global inlining and a global callgraph.
 +	  They however add some (single threaded) cost to the
 +	  compilation.  When in doubt do not enable.
 +
 +config LTO_CP_CLONE
 +	bool "Allow aggressive cloning for function specialization"
 +	depends on LTO
 +	help
 +	  Allow the compiler to clone and specialize functions for specific
 +	  arguments when it determines these arguments are very commonly
 +	  called.  Experimential. Will increase text size.
 +
  config SYSCTL
    bool
@@@ -1834,8 -1806,6 +1877,8 @@@ config MODULE_FORCE_UNLOA
config MODVERSIONS
    bool "Module versioning support"
 +	# LTO should work with gcc 4.9
 +	depends on !LTO
    help
      Usually, you have to use modules compiled with your kernel.
      Saying Y here makes it sometimes possible to use modules
diff --combined kernel/Makefile
index 973a40c,9b07bb7..de62ac0
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@@ -3,11 -3,12 +3,11 @@@
  #
obj-y     = fork.o exec_domain.o panic.o \
 -	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 -	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 +	    cpu.o exit.o softirq.o resource.o \
 +	    sysctl.o sysctl_binary.o capability.o ptrace.o user.o \
        signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
 -	    extable.o params.o posix-timers.o \
 -	    kthread.o sys_ni.o posix-cpu-timers.o \
 -	    hrtimer.o nsproxy.o \
 +	    extable.o params.o \
 +	    kthread.o sys_ni.o nsproxy.o \
        notifier.o ksysfs.o cred.o reboot.o \
        async.o range.o groups.o smpboot.o
@@@ -104,11 -105,27 +104,11 @@@ targets += config_data.g
  $(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
    $(call if_changed,gzip)
-       filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
+       filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
  targets += config_data.h
  $(obj)/config_data.h: $(obj)/config_data.gz FORCE
    $(call filechk,ikconfiggz)
-$(obj)/time.o: $(obj)/timeconst.h
 -
 -quiet_cmd_hzfile = HZFILE  $@
 -      cmd_hzfile = echo "hz=$(CONFIG_HZ)" > $@
 -
 -targets += hz.bc
 -$(obj)/hz.bc: $(objtree)/include/config/hz.h FORCE
 -	$(call if_changed,hzfile)
 -
 -quiet_cmd_bc  = BC      $@
 -      cmd_bc  = bc -q $(filter-out FORCE,$^) > $@
 -
 -targets += timeconst.h
 -$(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE
 -	$(call if_changed,bc)
 -
  ###############################################################################
  #
  # Roll all the X.509 certificates that we can find together and pull them into
diff --combined kernel/events/uprobes.c
index 6f3254e,46b7c31..1d0af8a
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@@ -167,6 -167,11 +167,11 @@@ static int __replace_page(struct vm_are
    /* For mmu_notifiers */
    const unsigned long mmun_start = addr;
    const unsigned long mmun_end   = addr + PAGE_SIZE;
+ 	struct mem_cgroup *memcg;
+ 
+ 	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
+ 	if (err)
+ 		return err;
/* For try_to_free_swap() and munlock_vma_page() below */
    lock_page(page);
@@@ -179,6 -184,8 +184,8 @@@
get_page(kpage);
    page_add_new_anon_rmap(kpage, vma, addr);
+ 	mem_cgroup_commit_charge(kpage, memcg, false);
+ 	lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
    	dec_mm_counter(mm, MM_FILEPAGES);
@@@ -200,6 -207,7 +207,7 @@@
err = 0;
   unlock:
+ 	mem_cgroup_cancel_charge(kpage, memcg);
    mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
    unlock_page(page);
    return err;
@@@ -315,18 -323,11 +323,11 @@@ retry
    if (!new_page)
    	goto put_old;
- 	if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
- 		goto put_new;
- 
    __SetPageUptodate(new_page);
    copy_highpage(new_page, old_page);
    copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
ret = __replace_page(vma, vaddr, old_page, new_page);
- 	if (ret)
- 		mem_cgroup_uncharge_page(new_page);
- 
- put_new:
    page_cache_release(new_page);
  put_old:
    put_page(old_page);
@@@ -846,7 -847,7 +847,7 @@@ static void __uprobe_unregister(struct 
  {
    int err;
-	if (!consumer_del(uprobe, uc))	/* WARN? */
 +	if (WARN_ON(!consumer_del(uprobe, uc)))
    	return;
err = register_for_each_vma(uprobe, NULL);
@@@ -927,7 -928,7 +928,7 @@@ int uprobe_apply(struct inode *inode, l
    int ret = -ENOENT;
uprobe = find_uprobe(inode, offset);
 -	if (!uprobe)
 +	if (WARN_ON(!uprobe))
    	return ret;
down_write(&uprobe->register_rwsem);
@@@ -952,7 -953,7 +953,7 @@@ void uprobe_unregister(struct inode *in
    struct uprobe *uprobe;
uprobe = find_uprobe(inode, offset);
 -	if (!uprobe)
 +	if (WARN_ON(!uprobe))
    	return;
down_write(&uprobe->register_rwsem);
diff --combined kernel/fork.c
index 8f54193,dd8864f..41c9890
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@@ -365,12 -365,11 +365,11 @@@ static int dup_mmap(struct mm_struct *m
     */
    down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
- 	mm->locked_vm = 0;
- 	mm->mmap = NULL;
- 	mm->vmacache_seqnum = 0;
- 	mm->map_count = 0;
- 	cpumask_clear(mm_cpumask(mm));
- 	mm->mm_rb = RB_ROOT;
+ 	mm->total_vm = oldmm->total_vm;
+ 	mm->shared_vm = oldmm->shared_vm;
+ 	mm->exec_vm = oldmm->exec_vm;
+ 	mm->stack_vm = oldmm->stack_vm;
+ 
    rb_link = &mm->mm_rb.rb_node;
    rb_parent = NULL;
    pprev = &mm->mmap;
@@@ -529,17 -528,28 +528,28 @@@ static void mm_init_aio(struct mm_struc
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
  {
+ 	mm->mmap = NULL;
+ 	mm->mm_rb = RB_ROOT;
+ 	mm->vmacache_seqnum = 0;
    atomic_set(&mm->mm_users, 1);
    atomic_set(&mm->mm_count, 1);
    init_rwsem(&mm->mmap_sem);
    INIT_LIST_HEAD(&mm->mmlist);
    mm->core_state = NULL;
    atomic_long_set(&mm->nr_ptes, 0);
+ 	mm->map_count = 0;
+ 	mm->locked_vm = 0;
+ 	mm->pinned_vm = 0;
    memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
    spin_lock_init(&mm->page_table_lock);
+ 	mm_init_cpumask(mm);
    mm_init_aio(mm);
    mm_init_owner(mm, p);
+ 	mmu_notifier_mm_init(mm);
    clear_tlb_flush_pending(mm);
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+ 	mm->pmd_huge_pte = NULL;
+ #endif
if (current->mm) {
    	mm->flags = current->mm->flags & MMF_INIT_MASK;
@@@ -549,11 -559,17 +559,17 @@@
    	mm->def_flags = 0;
    }
- 	if (likely(!mm_alloc_pgd(mm))) {
- 		mmu_notifier_mm_init(mm);
- 		return mm;
- 	}
+ 	if (mm_alloc_pgd(mm))
+ 		goto fail_nopgd;
+ 
+ 	if (init_new_context(p, mm))
+ 		goto fail_nocontext;
+ 	return mm;
+ 
+ fail_nocontext:
+ 	mm_free_pgd(mm);
+ fail_nopgd:
    free_mm(mm);
    return NULL;
  }
@@@ -587,7 -603,6 +603,6 @@@ struct mm_struct *mm_alloc(void
    	return NULL;
memset(mm, 0, sizeof(*mm));
- 	mm_init_cpumask(mm);
    return mm_init(mm, current);
  }
@@@ -819,17 -834,10 +834,10 @@@ static struct mm_struct *dup_mm(struct 
    	goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm));
- 	mm_init_cpumask(mm);
if (!mm_init(mm, tsk))
    	goto fail_nomem;
- 	if (init_new_context(tsk, mm))
- 		goto fail_nocontext;
- 
    dup_mm_exe_file(oldmm, mm);
err = dup_mmap(mm, oldmm);
@@@ -851,15 -859,6 +859,6 @@@ free_pt
fail_nomem:
    return NULL;
- 
- fail_nocontext:
- 	/*
- 	 * If init_new_context() failed, we cannot use mmput() to free the mm
- 	 * because it calls destroy_context()
- 	 */
- 	mm_free_pgd(mm);
- 	free_mm(mm);
- 	return NULL;
  }
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@@ -1262,7 -1261,7 +1261,7 @@@ static struct task_struct *copy_process
posix_cpu_timers_init(p);
-	do_posix_clock_monotonic_gettime(&p->start_time);
 +	ktime_get_ts(&p->start_time);
    p->real_start_time = p->start_time;
    monotonic_to_bootbased(&p->real_start_time);
    p->io_context = NULL;
@@@ -1328,6 -1327,7 +1327,7 @@@
    if (retval)
    	goto bad_fork_cleanup_policy;
    /* copy all the process information */
+ 	shm_init_task(p);
    retval = copy_semundo(clone_flags, p);
    if (retval)
    	goto bad_fork_cleanup_audit;
@@@ -1873,6 -1873,11 +1873,11 @@@ SYSCALL_DEFINE1(unshare, unsigned long
    		 */
    		exit_sem(current);
    	}
+ 		if (unshare_flags & CLONE_NEWIPC) {
+ 			/* Orphan segments in old ns (see sem above). */
+ 			exit_shm(current);
+ 			shm_init_task(current);
+ 		}
if (new_nsproxy)
    		switch_task_namespaces(current, new_nsproxy);
diff --combined kernel/time/posix-timers.c
index 424c2d4,86535c0..86535c0
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@@ -71,7 -71,7 +71,7 @@@ static DEFINE_SPINLOCK(hash_lock)
   * SIGEV values.  Here we put out an error if this assumption fails.
   */
  #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
-                        ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+ 		       ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
  #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
  #endif
@@@ -252,7 -252,8 +252,8 @@@ static int posix_get_monotonic_coarse(c
    return 0;
  }
- static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+ static int posix_get_coarse_res(const clockid_t which_clock,
+ 				struct timespec *tp)
  {
    *tp = ktime_to_timespec(KTIME_LOW_RES);
    return 0;
@@@ -333,14 -334,16 +334,16 @@@ static __init int init_posix_timers(voi
    posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
    posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
    posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- 	posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- 	posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ 	posix_timers_register_clock(CLOCK_REALTIME_COARSE,
+ 				    &clock_realtime_coarse);
+ 	posix_timers_register_clock(CLOCK_MONOTONIC_COARSE,
+ 				    &clock_monotonic_coarse);
    posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
    posix_timers_register_clock(CLOCK_TAI, &clock_tai);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
- 					sizeof (struct k_itimer), 0, SLAB_PANIC,
- 					NULL);
+ 					       sizeof(struct k_itimer), 0,
+ 					       SLAB_PANIC, NULL);
    return 0;
  }
@@@ -494,11 -497,11 +497,11 @@@ static enum hrtimer_restart posix_timer
    return ret;
  }
- static struct pid *good_sigevent(sigevent_t * event)
+ static struct pid *good_sigevent(sigevent_t *event)
  {
    struct task_struct *rtn = current->group_leader;
- 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ 	if ((event->sigev_notify & SIGEV_THREAD_ID) &&
    	(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
    	 !same_thread_group(rtn, current) ||
    	 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
@@@ -515,18 -518,18 +518,18 @@@ void posix_timers_register_clock(const 
    			 struct k_clock *new_clock)
  {
    if ((unsigned) clock_id >= MAX_CLOCKS) {
- 		printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ 		pr_warn("POSIX clock register failed for clock_id %d\n",
    	       clock_id);
    	return;
    }
if (!new_clock->clock_get) {
- 		printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ 		pr_warn("POSIX clock id %d lacks clock_get()\n",
    	       clock_id);
    	return;
    }
    if (!new_clock->clock_getres) {
- 		printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+ 		pr_warn("POSIX clock id %d lacks clock_getres()\n",
    	       clock_id);
    	return;
    }
@@@ -535,7 -538,7 +538,7 @@@
  }
  EXPORT_SYMBOL_GPL(posix_timers_register_clock);
- static struct k_itimer * alloc_posix_timer(void)
+ static struct k_itimer *alloc_posix_timer(void)
  {
    struct k_itimer *tmr;
    tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
@@@ -622,7 -625,7 +625,7 @@@ SYSCALL_DEFINE3(timer_create, const clo
    new_timer->it_overrun = -1;
if (timer_event_spec) {
- 		if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+ 		if (copy_from_user(&event, timer_event_spec, sizeof(event))) {
    		error = -EFAULT;
    		goto out;
    	}
@@@ -647,7 -650,7 +650,7 @@@
    new_timer->sigq->info.si_code  = SI_TIMER;
if (copy_to_user(created_timer_id,
- 			 &new_timer_id, sizeof (new_timer_id))) {
+ 			 &new_timer_id, sizeof(new_timer_id))) {
    	error = -EFAULT;
    	goto out;
    }
@@@ -748,7 -751,8 +751,8 @@@ common_timer_get(struct k_itimer *timr
     */
    if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
        (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
- 		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ 		timr->it_overrun += (unsigned int) hrtimer_forward(timer, now,
+ 								   iv);
remaining = ktime_sub(hrtimer_get_expires(timer), now);
    /* Return 0 only, when the timer is expired and not pending */
@@@ -785,7 -789,7 +789,7 @@@ SYSCALL_DEFINE2(timer_gettime, timer_t
unlock_timer(timr, flags);
- 	if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ 	if (!ret && copy_to_user(setting, &cur_setting, sizeof(cur_setting)))
    	return -EFAULT;
return ret;
@@@ -837,7 -841,7 +841,7 @@@ common_timer_set(struct k_itimer *timr
    if (hrtimer_try_to_cancel(timer) < 0)
    	return TIMER_RETRY;
- 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
+ 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
    	~REQUEUE_PENDING;
    timr->it_overrun_last = 0;
@@@ -857,9 -861,8 +861,8 @@@
    /* SIGEV_NONE timers are not queued ! See common_timer_get */
    if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
    	/* Setup correct expiry time for relative timers */
- 		if (mode == HRTIMER_MODE_REL) {
+ 		if (mode == HRTIMER_MODE_REL)
    		hrtimer_add_expires(timer, timer->base->get_time());
- 		}
    	return 0;
    }
@@@ -882,7 -885,7 +885,7 @@@ SYSCALL_DEFINE4(timer_settime, timer_t
    if (!new_setting)
    	return -EINVAL;
- 	if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+ 	if (copy_from_user(&new_spec, new_setting, sizeof(new_spec)))
    	return -EFAULT;
if (!timespec_valid(&new_spec.it_interval) ||
@@@ -901,12 -904,12 +904,12 @@@ retry
unlock_timer(timr, flag);
    if (error == TIMER_RETRY) {
- 		rtn = NULL;	// We already got the old time...
+ 		rtn = NULL;	/* We already got the old time... */
    	goto retry;
    }
if (old_setting && !error &&
- 	    copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+ 	    copy_to_user(old_setting, &old_spec, sizeof(old_spec)))
    	error = -EFAULT;
return error;
@@@ -1008,14 -1011,14 +1011,14 @@@ SYSCALL_DEFINE2(clock_settime, const cl
    if (!kc || !kc->clock_set)
    	return -EINVAL;
- 	if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ 	if (copy_from_user(&new_tp, tp, sizeof(*tp)))
    	return -EFAULT;
return kc->clock_set(which_clock, &new_tp);
  }
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
- 		struct timespec __user *,tp)
+ 		struct timespec __user *, tp)
  {
    struct k_clock *kc = clockid_to_kclock(which_clock);
    struct timespec kernel_tp;
@@@ -1026,7 -1029,7 +1029,7 @@@
error = kc->clock_get(which_clock, &kernel_tp);
- 	if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+ 	if (!error && copy_to_user(tp, &kernel_tp, sizeof(kernel_tp)))
    	error = -EFAULT;
return error;
@@@ -1067,7 -1070,7 +1070,7 @@@ SYSCALL_DEFINE2(clock_getres, const clo
error = kc->clock_getres(which_clock, &rtn_tp);
- 	if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+ 	if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof(rtn_tp)))
    	error = -EFAULT;
return error;
@@@ -1096,7 -1099,7 +1099,7 @@@ SYSCALL_DEFINE4(clock_nanosleep, const 
    if (!kc->nsleep)
    	return -ENANOSLEEP_NOTSUP;
- 	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+ 	if (copy_from_user(&t, rqtp, sizeof(struct timespec)))
    	return -EFAULT;
if (!timespec_valid(&t))
diff --combined mm/memcontrol.c
index db536e9,6c3ffb0..df394e0
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -882,13 -882,6 +882,6 @@@ static long mem_cgroup_read_stat(struc
    return val;
  }
- static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
- 					 bool charge)
- {
- 	int val = (charge) ? 1 : -1;
- 	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
- }
- 
  static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
    				    enum mem_cgroup_events_index idx)
  {
@@@ -909,13 -902,15 +902,15 @@@
static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
    				 struct page *page,
- 					 bool anon, int nr_pages)
+ 					 int nr_pages)
  {
+ 	preempt_disable();
+ 
    /*
     * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
     * counted as CACHE even if it's on ANON LRU.
     */
- 	if (anon)
+ 	if (PageAnon(page))
    	__this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
    			nr_pages);
    else
@@@ -935,6 -930,7 +930,7 @@@
    }
__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
+ 	preempt_enable();
  }
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@@ -1347,20 -1343,6 +1343,6 @@@ out
    return lruvec;
  }
- /*
-  * Following LRU functions are allowed to be used without PCG_LOCK.
-  * Operations are called by routine of global LRU independently from memcg.
-  * What we have to take care of here is validness of pc->mem_cgroup.
-  *
-  * Changes to pc->mem_cgroup happens when
-  * 1. charge
-  * 2. moving account
-  * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
-  * It is added to LRU before charge.
-  * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
-  * When moving account, the page is not on LRU. It's isolated.
-  */
- 
  /**
   * mem_cgroup_page_lruvec - return lruvec for adding an lru page
   * @page: the page
@@@ -2261,22 -2243,14 +2243,14 @@@ cleanup
   *
   * Notes: Race condition
   *
-  * We usually use lock_page_cgroup() for accessing page_cgroup member but
-  * it tends to be costly. But considering some conditions, we doesn't need
-  * to do so _always_.
-  *
-  * Considering "charge", lock_page_cgroup() is not required because all
-  * file-stat operations happen after a page is attached to radix-tree. There
-  * are no race with "charge".
+  * Charging occurs during page instantiation, while the page is
+  * unmapped and locked in page migration, or while the page table is
+  * locked in THP migration.  No race is possible.
   *
-  * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup
-  * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
-  * if there are race with "uncharge". Statistics itself is properly handled
-  * by flags.
+  * Uncharge happens to pages with zero references, no race possible.
   *
-  * Considering "move", this is an only case we see a race. To make the race
-  * small, we check memcg->moving_account and detect there are possibility
-  * of race or not. If there is, we take a lock.
+  * Charge moving between groups is protected by checking mm->moving
+  * account and taking the move_lock in the slowpath.
   */
void __mem_cgroup_begin_update_page_stat(struct page *page,
@@@ -2551,55 -2525,63 +2525,63 @@@ static int memcg_cpu_hotplug_callback(s
    return NOTIFY_OK;
  }
- 
- /* See mem_cgroup_try_charge() for details */
- enum {
- 	CHARGE_OK,		/* success */
- 	CHARGE_RETRY,		/* need to retry but retry is not bad */
- 	CHARGE_NOMEM,		/* we can't do more. return -ENOMEM */
- 	CHARGE_WOULDBLOCK,	/* GFP_WAIT wasn't set and no enough res. */
- };
- 
- static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- 				unsigned int nr_pages, unsigned int min_pages,
- 				bool invoke_oom)
+ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
+ 		      unsigned int nr_pages)
  {
- 	unsigned long csize = nr_pages * PAGE_SIZE;
+ 	unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
    struct mem_cgroup *mem_over_limit;
    struct res_counter *fail_res;
+ 	unsigned long nr_reclaimed;
    unsigned long flags = 0;
- 	int ret;
+ 	unsigned long long size;
+ 	int ret = 0;
- 	ret = res_counter_charge(&memcg->res, csize, &fail_res);
+ retry:
+ 	if (consume_stock(memcg, nr_pages))
+ 		goto done;
- 	if (likely(!ret)) {
+ 	size = batch * PAGE_SIZE;
+ 	if (!res_counter_charge(&memcg->res, size, &fail_res)) {
    	if (!do_swap_account)
- 			return CHARGE_OK;
- 		ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
- 		if (likely(!ret))
- 			return CHARGE_OK;
- 
- 		res_counter_uncharge(&memcg->res, csize);
+ 			goto done_restock;
+ 		if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+ 			goto done_restock;
+ 		res_counter_uncharge(&memcg->res, size);
    	mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
    	flags |= MEM_CGROUP_RECLAIM_NOSWAP;
    } else
    	mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+ 
+ 	if (batch > nr_pages) {
+ 		batch = nr_pages;
+ 		goto retry;
+ 	}
+ 
    /*
- 	 * Never reclaim on behalf of optional batching, retry with a
- 	 * single page instead.
+ 	 * Unlike in global OOM situations, memcg is not in a physical
+ 	 * memory shortage.  Allow dying and OOM-killed tasks to
+ 	 * bypass the last charges so that they can exit quickly and
+ 	 * free their memory.
     */
- 	if (nr_pages > min_pages)
- 		return CHARGE_RETRY;
+ 	if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ 		     fatal_signal_pending(current) ||
+ 		     current->flags & PF_EXITING))
+ 		goto bypass;
+ 
+ 	if (unlikely(task_in_memcg_oom(current)))
+ 		goto nomem;
if (!(gfp_mask & __GFP_WAIT))
- 		return CHARGE_WOULDBLOCK;
+ 		goto nomem;
- 	if (gfp_mask & __GFP_NORETRY)
- 		return CHARGE_NOMEM;
+ 	nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- 	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
- 		return CHARGE_RETRY;
+ 	if (mem_cgroup_margin(mem_over_limit) >= batch)
+ 		goto retry;
+ 
+ 	if (gfp_mask & __GFP_NORETRY)
+ 		goto nomem;
    /*
     * Even though the limit is exceeded at this point, reclaim
     * may have been able to free some pages.  Retry the charge
@@@ -2609,142 -2591,47 +2591,47 @@@
     * unlikely to succeed so close to the limit, and we fall back
     * to regular pages anyway in case of failure.
     */
- 	if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
- 		return CHARGE_RETRY;
- 
+ 	if (nr_reclaimed && batch <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+ 		goto retry;
    /*
     * At task move, charge accounts can be doubly counted. So, it's
     * better to wait until the end of task_move if something is going on.
     */
    if (mem_cgroup_wait_acct_move(mem_over_limit))
- 		return CHARGE_RETRY;
- 
- 	if (invoke_oom)
- 		mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
- 
- 	return CHARGE_NOMEM;
- }
- 
- /**
-  * mem_cgroup_try_charge - try charging a memcg
-  * @memcg: memcg to charge
-  * @nr_pages: number of pages to charge
-  * @oom: trigger OOM if reclaim fails
-  *
-  * Returns 0 if @memcg was charged successfully, -EINTR if the charge
-  * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
-  */
- static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
- 				 gfp_t gfp_mask,
- 				 unsigned int nr_pages,
- 				 bool oom)
- {
- 	unsigned int batch = max(CHARGE_BATCH, nr_pages);
- 	int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- 	int ret;
- 
- 	if (mem_cgroup_is_root(memcg))
- 		goto done;
- 	/*
- 	 * Unlike in global OOM situations, memcg is not in a physical
- 	 * memory shortage.  Allow dying and OOM-killed tasks to
- 	 * bypass the last charges so that they can exit quickly and
- 	 * free their memory.
- 	 */
- 	if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- 		     fatal_signal_pending(current) ||
- 		     current->flags & PF_EXITING))
- 		goto bypass;
+ 		goto retry;
- 	if (unlikely(task_in_memcg_oom(current)))
- 		goto nomem;
+ 	if (nr_retries--)
+ 		goto retry;
if (gfp_mask & __GFP_NOFAIL)
- 		oom = false;
- again:
- 	if (consume_stock(memcg, nr_pages))
- 		goto done;
- 
- 	do {
- 		bool invoke_oom = oom && !nr_oom_retries;
- 
- 		/* If killed, bypass charge */
- 		if (fatal_signal_pending(current))
- 			goto bypass;
+ 		goto bypass;
- 		ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
- 					   nr_pages, invoke_oom);
- 		switch (ret) {
- 		case CHARGE_OK:
- 			break;
- 		case CHARGE_RETRY: /* not in OOM situation but retry */
- 			batch = nr_pages;
- 			goto again;
- 		case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- 			goto nomem;
- 		case CHARGE_NOMEM: /* OOM routine works */
- 			if (!oom || invoke_oom)
- 				goto nomem;
- 			nr_oom_retries--;
- 			break;
- 		}
- 	} while (ret != CHARGE_OK);
+ 	if (fatal_signal_pending(current))
+ 		goto bypass;
- 	if (batch > nr_pages)
- 		refill_stock(memcg, batch - nr_pages);
- done:
- 	return 0;
+ 	mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(batch));
  nomem:
    if (!(gfp_mask & __GFP_NOFAIL))
    	return -ENOMEM;
  bypass:
- 	return -EINTR;
- }
- 
- /**
-  * mem_cgroup_try_charge_mm - try charging a mm
-  * @mm: mm_struct to charge
-  * @nr_pages: number of pages to charge
-  * @oom: trigger OOM if reclaim fails
-  *
-  * Returns the charged mem_cgroup associated with the given mm_struct or
-  * NULL the charge failed.
-  */
- static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
- 				 gfp_t gfp_mask,
- 				 unsigned int nr_pages,
- 				 bool oom)
- 
- {
- 	struct mem_cgroup *memcg;
- 	int ret;
- 
- 	memcg = get_mem_cgroup_from_mm(mm);
- 	ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
- 	css_put(&memcg->css);
- 	if (ret == -EINTR)
- 		memcg = root_mem_cgroup;
- 	else if (ret)
- 		memcg = NULL;
+ 	memcg = root_mem_cgroup;
+ 	ret = -EINTR;
+ 	goto retry;
- 	return memcg;
+ done_restock:
+ 	if (batch > nr_pages)
+ 		refill_stock(memcg, batch - nr_pages);
+ done:
+ 	return ret;
  }
- /*
-  * Somemtimes we have to undo a charge we got by try_charge().
-  * This function is for that and do uncharge, put css's refcnt.
-  * gotten by try_charge().
-  */
- static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
- 				       unsigned int nr_pages)
+ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
  {
- 	if (!mem_cgroup_is_root(memcg)) {
- 		unsigned long bytes = nr_pages * PAGE_SIZE;
+ 	unsigned long bytes = nr_pages * PAGE_SIZE;
- 		res_counter_uncharge(&memcg->res, bytes);
- 		if (do_swap_account)
- 			res_counter_uncharge(&memcg->memsw, bytes);
- 	}
+ 	res_counter_uncharge(&memcg->res, bytes);
+ 	if (do_swap_account)
+ 		res_counter_uncharge(&memcg->memsw, bytes);
  }
/*
@@@ -2756,9 -2643,6 +2643,6 @@@ static void __mem_cgroup_cancel_local_c
  {
    unsigned long bytes = nr_pages * PAGE_SIZE;
- 	if (mem_cgroup_is_root(memcg))
- 		return;
- 
    res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
    if (do_swap_account)
    	res_counter_uncharge_until(&memcg->memsw,
@@@ -2779,6 -2663,16 +2663,16 @@@ static struct mem_cgroup *mem_cgroup_lo
    return mem_cgroup_from_id(id);
  }
+ /*
+  * try_get_mem_cgroup_from_page - look up page's memcg association
+  * @page: the page
+  *
+  * Look up, get a css reference, and return the memcg that owns @page.
+  *
+  * The page must be locked to prevent racing with swap-in and page
+  * cache charges.  If coming from an unlocked page table, the caller
+  * must ensure the page is on the LRU or this can race with charging.
+  */
  struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
  {
    struct mem_cgroup *memcg = NULL;
@@@ -2789,7 -2683,6 +2683,6 @@@
    VM_BUG_ON_PAGE(!PageLocked(page), page);
pc = lookup_page_cgroup(page);
- 	lock_page_cgroup(pc);
    if (PageCgroupUsed(pc)) {
    	memcg = pc->mem_cgroup;
    	if (memcg && !css_tryget_online(&memcg->css))
@@@ -2803,23 -2696,17 +2696,17 @@@
    		memcg = NULL;
    	rcu_read_unlock();
    }
    return memcg;
  }
- static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
- 				       struct page *page,
- 				       unsigned int nr_pages,
- 				       enum charge_type ctype,
- 				       bool lrucare)
+ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
+ 			  unsigned int nr_pages, bool lrucare)
  {
    struct page_cgroup *pc = lookup_page_cgroup(page);
    struct zone *uninitialized_var(zone);
    struct lruvec *lruvec;
    bool was_on_lru = false;
- 	bool anon;
- 	lock_page_cgroup(pc);
    VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
    /*
     * we don't need page_cgroup_lock about tail pages, becase they are not
@@@ -2841,16 -2728,22 +2728,22 @@@
    	}
    }
- 	pc->mem_cgroup = memcg;
    /*
- 	 * We access a page_cgroup asynchronously without lock_page_cgroup().
- 	 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
- 	 * is accessed after testing USED bit. To make pc->mem_cgroup visible
- 	 * before USED bit, we need memory barrier here.
- 	 * See mem_cgroup_add_lru_list(), etc.
+ 	 * Nobody should be changing or seriously looking at
+ 	 * pc->mem_cgroup and pc->flags at this point:
+ 	 *
+ 	 * - the page is uncharged
+ 	 *
+ 	 * - the page is off-LRU
+ 	 *
+ 	 * - an anonymous fault has exclusive page access, except for
+ 	 *   a locked page table
+ 	 *
+ 	 * - a page cache insertion, a swapin fault, or a migration
+ 	 *   have the page locked
     */
- 	smp_wmb();
- 	SetPageCgroupUsed(pc);
+ 	pc->mem_cgroup = memcg;
+ 	pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
if (lrucare) {
    	if (was_on_lru) {
@@@ -2862,14 -2755,7 +2755,7 @@@
    	spin_unlock_irq(&zone->lru_lock);
    }
- 	if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
- 		anon = true;
- 	else
- 		anon = false;
- 
- 	mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
- 	unlock_page_cgroup(pc);
- 
+ 	mem_cgroup_charge_statistics(memcg, page, nr_pages);
    /*
     * "charge_statistics" updated event counter. Then, check it.
     * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@@ -2937,22 -2823,21 +2823,21 @@@ static int memcg_charge_kmem(struct mem
    if (ret)
    	return ret;
- 	ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
- 				    oom_gfp_allowed(gfp));
+ 	ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
    if (ret == -EINTR)  {
    	/*
- 		 * mem_cgroup_try_charge() chosed to bypass to root due to
- 		 * OOM kill or fatal signal.  Since our only options are to
- 		 * either fail the allocation or charge it to this cgroup, do
- 		 * it as a temporary condition. But we can't fail. From a
- 		 * kmem/slab perspective, the cache has already been selected,
- 		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+ 		 * try_charge() chose to bypass to root due to OOM kill or
+ 		 * fatal signal.  Since our only options are to either fail
+ 		 * the allocation or charge it to this cgroup, do it as a
+ 		 * temporary condition. But we can't fail. From a kmem/slab
+ 		 * perspective, the cache has already been selected, by
+ 		 * mem_cgroup_kmem_get_cache(), so it is too late to change
    	 * our minds.
    	 *
    	 * This condition will only trigger if the task entered
- 		 * memcg_charge_kmem in a sane state, but was OOM-killed during
- 		 * mem_cgroup_try_charge() above. Tasks that were already
- 		 * dying when the allocation triggers should have been already
+ 		 * memcg_charge_kmem in a sane state, but was OOM-killed
+ 		 * during try_charge() above. Tasks that were already dying
+ 		 * when the allocation triggers should have been already
    	 * directed to the root cgroup in memcontrol.h
    	 */
    	res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@@ -3076,6 -2961,8 +2961,8 @@@ int memcg_update_cache_size(struct kmem
    return 0;
  }
+ static void memcg_unregister_cache_func(struct work_struct *work);
+ 
  int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
    		     struct kmem_cache *root_cache)
  {
@@@ -3097,6 -2984,9 +2984,9 @@@
    if (memcg) {
    	s->memcg_params->memcg = memcg;
    	s->memcg_params->root_cache = root_cache;
+ 		atomic_long_set(&s->memcg_params->refcnt, 1);
+ 		INIT_WORK(&s->memcg_params->unregister_work,
+ 			  memcg_unregister_cache_func);
    	css_get(&memcg->css);
    } else
    	s->memcg_params->is_root_cache = true;
@@@ -3178,6 -3068,25 +3068,25 @@@ static void memcg_unregister_cache(stru
    kmem_cache_destroy(cachep);
  }
+ static void memcg_unregister_cache_func(struct work_struct *work)
+ {
+ 	struct memcg_cache_params *params =
+ 		container_of(work, struct memcg_cache_params, unregister_work);
+ 	struct kmem_cache *cachep = memcg_params_to_cache(params);
+ 
+ 	mutex_lock(&memcg_slab_mutex);
+ 	memcg_unregister_cache(cachep);
+ 	mutex_unlock(&memcg_slab_mutex);
+ }
+ 
+ static void memcg_unregister_cache_rcu_func(struct rcu_head *rcu)
+ {
+ 	struct memcg_cache_params *params =
+ 		container_of(rcu, struct memcg_cache_params, rcu_head);
+ 
+ 	schedule_work(&params->unregister_work);
+ }
+ 
  /*
   * During the creation a new cache, we need to disable our accounting mechanism
   * altogether. This is true even if we are not creating, but rather just
@@@ -3233,6 -3142,7 +3142,7 @@@ static void memcg_unregister_all_caches
  {
    struct kmem_cache *cachep;
    struct memcg_cache_params *params, *tmp;
+ 	LIST_HEAD(empty_caches);
if (!memcg_kmem_is_active(memcg))
    	return;
@@@ -3240,9 -3150,31 +3150,31 @@@
    mutex_lock(&memcg_slab_mutex);
    list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
    	cachep = memcg_params_to_cache(params);
+ 
+ 		memcg_cache_mark_dead(cachep);
    	kmem_cache_shrink(cachep);
- 		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
- 			memcg_unregister_cache(cachep);
+ 
+ 		if (atomic_long_dec_and_test(&cachep->memcg_params->refcnt))
+ 			list_move(&cachep->memcg_params->list, &empty_caches);
+ 	}
+ 
+ 	/*
+ 	 * kmem_cache_free doesn't expect that the cache can be destroyed as
+ 	 * soon as the object is freed, e.g. SLUB's implementation may want to
+ 	 * update cache stats after putting the object to the free list.
+ 	 *
+ 	 * Therefore we should wait for all kmem_cache_free's to finish before
+ 	 * proceeding to cache destruction. Since both SLAB and SLUB versions
+ 	 * of kmem_cache_free are non-preemptable, we wait for rcu-sched grace
+ 	 * period to elapse.
+ 	 */
+ 	synchronize_sched();
+ 
+ 	while (!list_empty(&empty_caches)) {
+ 		params = list_first_entry(&empty_caches,
+ 					  struct memcg_cache_params, list);
+ 		cachep = memcg_params_to_cache(params);
+ 		memcg_unregister_cache(cachep);
    }
    mutex_unlock(&memcg_slab_mutex);
  }
@@@ -3315,14 -3247,18 +3247,18 @@@ int __memcg_charge_slab(struct kmem_cac
    res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
    			PAGE_SIZE << order);
    if (!res)
- 		atomic_add(1 << order, &cachep->memcg_params->nr_pages);
+ 		atomic_long_inc(&cachep->memcg_params->refcnt);
    return res;
  }
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
  {
    memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
- 	atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
+ 
+ 	if (unlikely(atomic_long_dec_and_test(&cachep->memcg_params->refcnt)))
+ 		/* see memcg_unregister_all_caches */
+ 		call_rcu_sched(&cachep->memcg_params->rcu_head,
+ 			       memcg_unregister_cache_rcu_func);
  }
/*
@@@ -3463,12 -3399,13 +3399,13 @@@ void __memcg_kmem_commit_charge(struct 
    	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
    	return;
    }
- 
+ 	/*
+ 	 * The page is freshly allocated and not visible to any
+ 	 * outside callers yet.  Set up pc non-atomically.
+ 	 */
    pc = lookup_page_cgroup(page);
- 	lock_page_cgroup(pc);
    pc->mem_cgroup = memcg;
- 	SetPageCgroupUsed(pc);
- 	unlock_page_cgroup(pc);
+ 	pc->flags = PCG_USED;
  }
void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@@ -3478,19 -3415,11 +3415,11 @@@
pc = lookup_page_cgroup(page);
    if (!PageCgroupUsed(pc))
    	return;
- 	lock_page_cgroup(pc);
- 	if (PageCgroupUsed(pc)) {
- 		memcg = pc->mem_cgroup;
- 		ClearPageCgroupUsed(pc);
- 	}
- 	unlock_page_cgroup(pc);
+ 	memcg = pc->mem_cgroup;
+ 	pc->flags = 0;
/*
     * We trust that only if there is a memcg associated with the page, it
@@@ -3510,7 -3439,6 +3439,6 @@@ static inline void memcg_unregister_all
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
  /*
   * Because tail pages are not marked as "used", set it. We're under
   * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@@ -3531,8 -3459,7 +3459,7 @@@ void mem_cgroup_split_huge_fixup(struc
    for (i = 1; i < HPAGE_PMD_NR; i++) {
    	pc = head_pc + i;
    	pc->mem_cgroup = memcg;
- 		smp_wmb();/* see __commit_charge() */
- 		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+ 		pc->flags = head_pc->flags;
    }
    __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
    	       HPAGE_PMD_NR);
@@@ -3562,7 -3489,6 +3489,6 @@@ static int mem_cgroup_move_account(stru
  {
    unsigned long flags;
    int ret;
- 	bool anon = PageAnon(page);
VM_BUG_ON(from == to);
    VM_BUG_ON_PAGE(PageLRU(page), page);
@@@ -3576,15 -3502,13 +3502,13 @@@
    if (nr_pages > 1 && !PageTransHuge(page))
    	goto out;
- 	lock_page_cgroup(pc);
- 
    ret = -EINVAL;
    if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
- 		goto unlock;
+ 		goto out;
move_lock_mem_cgroup(from, &flags);
- 	if (!anon && page_mapped(page)) {
+ 	if (!PageAnon(page) && page_mapped(page)) {
    	__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
    		       nr_pages);
    	__this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@@ -3598,15 -3522,19 +3522,19 @@@
    		       nr_pages);
    }
- 	mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
+ 	mem_cgroup_charge_statistics(from, page, -nr_pages);
+ 
+ 	/*
+ 	 * It is safe to change pc->mem_cgroup here because the page
+ 	 * is referenced, charged, and isolated - we can't race with
+ 	 * uncharging, charging, migration, or LRU putback.
+ 	 */
/* caller should have done css_get */
    pc->mem_cgroup = to;
- 	mem_cgroup_charge_statistics(to, page, anon, nr_pages);
+ 	mem_cgroup_charge_statistics(to, page, nr_pages);
    move_unlock_mem_cgroup(from, &flags);
    ret = 0;
- unlock:
- 	unlock_page_cgroup(pc);
    /*
     * check events
     */
@@@ -3682,357 -3610,6 +3610,6 @@@ out
    return ret;
  }
- int mem_cgroup_charge_anon(struct page *page,
- 			      struct mm_struct *mm, gfp_t gfp_mask)
- {
- 	unsigned int nr_pages = 1;
- 	struct mem_cgroup *memcg;
- 	bool oom = true;
- 
- 	if (mem_cgroup_disabled())
- 		return 0;
- 
- 	VM_BUG_ON_PAGE(page_mapped(page), page);
- 	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- 	VM_BUG_ON(!mm);
- 
- 	if (PageTransHuge(page)) {
- 		nr_pages <<= compound_order(page);
- 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- 		/*
- 		 * Never OOM-kill a process for a huge page.  The
- 		 * fault handler will fall back to regular pages.
- 		 */
- 		oom = false;
- 	}
- 
- 	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
- 	if (!memcg)
- 		return -ENOMEM;
- 	__mem_cgroup_commit_charge(memcg, page, nr_pages,
- 				   MEM_CGROUP_CHARGE_TYPE_ANON, false);
- 	return 0;
- }
- 
- /*
-  * While swap-in, try_charge -> commit or cancel, the page is locked.
-  * And when try_charge() successfully returns, one refcnt to memcg without
-  * struct page_cgroup is acquired. This refcnt will be consumed by
-  * "commit()" or removed by "cancel()"
-  */
- static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
- 					  struct page *page,
- 					  gfp_t mask,
- 					  struct mem_cgroup **memcgp)
- {
- 	struct mem_cgroup *memcg = NULL;
- 	struct page_cgroup *pc;
- 	int ret;
- 
- 	pc = lookup_page_cgroup(page);
- 	/*
- 	 * Every swap fault against a single page tries to charge the
- 	 * page, bail as early as possible.  shmem_unuse() encounters
- 	 * already charged pages, too.  The USED bit is protected by
- 	 * the page lock, which serializes swap cache removal, which
- 	 * in turn serializes uncharging.
- 	 */
- 	if (PageCgroupUsed(pc))
- 		goto out;
- 	if (do_swap_account)
- 		memcg = try_get_mem_cgroup_from_page(page);
- 	if (!memcg)
- 		memcg = get_mem_cgroup_from_mm(mm);
- 	ret = mem_cgroup_try_charge(memcg, mask, 1, true);
- 	css_put(&memcg->css);
- 	if (ret == -EINTR)
- 		memcg = root_mem_cgroup;
- 	else if (ret)
- 		return ret;
- out:
- 	*memcgp = memcg;
- 	return 0;
- }
- 
- int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
- 				 gfp_t gfp_mask, struct mem_cgroup **memcgp)
- {
- 	if (mem_cgroup_disabled()) {
- 		*memcgp = NULL;
- 		return 0;
- 	}
- 	/*
- 	 * A racing thread's fault, or swapoff, may have already
- 	 * updated the pte, and even removed page from swap cache: in
- 	 * those cases unuse_pte()'s pte_same() test will fail; but
- 	 * there's also a KSM case which does need to charge the page.
- 	 */
- 	if (!PageSwapCache(page)) {
- 		struct mem_cgroup *memcg;
- 
- 		memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- 		if (!memcg)
- 			return -ENOMEM;
- 		*memcgp = memcg;
- 		return 0;
- 	}
- 	return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
- }
- 
- void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
- {
- 	if (mem_cgroup_disabled())
- 		return;
- 	if (!memcg)
- 		return;
- 	__mem_cgroup_cancel_charge(memcg, 1);
- }
- 
- static void
- __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
- 					enum charge_type ctype)
- {
- 	if (mem_cgroup_disabled())
- 		return;
- 	if (!memcg)
- 		return;
- 
- 	__mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
- 	/*
- 	 * Now swap is on-memory. This means this page may be
- 	 * counted both as mem and swap....double count.
- 	 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
- 	 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
- 	 * may call delete_from_swap_cache() before reach here.
- 	 */
- 	if (do_swap_account && PageSwapCache(page)) {
- 		swp_entry_t ent = {.val = page_private(page)};
- 		mem_cgroup_uncharge_swap(ent);
- 	}
- }
- 
- void mem_cgroup_commit_charge_swapin(struct page *page,
- 				     struct mem_cgroup *memcg)
- {
- 	__mem_cgroup_commit_charge_swapin(page, memcg,
- 					  MEM_CGROUP_CHARGE_TYPE_ANON);
- }
- 
- int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
- 				gfp_t gfp_mask)
- {
- 	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- 	struct mem_cgroup *memcg;
- 	int ret;
- 
- 	if (mem_cgroup_disabled())
- 		return 0;
- 	if (PageCompound(page))
- 		return 0;
- 
- 	if (PageSwapCache(page)) { /* shmem */
- 		ret = __mem_cgroup_try_charge_swapin(mm, page,
- 						     gfp_mask, &memcg);
- 		if (ret)
- 			return ret;
- 		__mem_cgroup_commit_charge_swapin(page, memcg, type);
- 		return 0;
- 	}
- 
- 	memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- 	if (!memcg)
- 		return -ENOMEM;
- 	__mem_cgroup_commit_charge(memcg, page, 1, type, false);
- 	return 0;
- }
- 
- static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
- 				   unsigned int nr_pages,
- 				   const enum charge_type ctype)
- {
- 	struct memcg_batch_info *batch = NULL;
- 	bool uncharge_memsw = true;
- 
- 	/* If swapout, usage of swap doesn't decrease */
- 	if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
- 		uncharge_memsw = false;
- 
- 	batch = &current->memcg_batch;
- 	/*
- 	 * In usual, we do css_get() when we remember memcg pointer.
- 	 * But in this case, we keep res->usage until end of a series of
- 	 * uncharges. Then, it's ok to ignore memcg's refcnt.
- 	 */
- 	if (!batch->memcg)
- 		batch->memcg = memcg;
- 	/*
- 	 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
- 	 * In those cases, all pages freed continuously can be expected to be in
- 	 * the same cgroup and we have chance to coalesce uncharges.
- 	 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
- 	 * because we want to do uncharge as soon as possible.
- 	 */
- 
- 	if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
- 		goto direct_uncharge;
- 
- 	if (nr_pages > 1)
- 		goto direct_uncharge;
- 
- 	/*
- 	 * In typical case, batch->memcg == mem. This means we can
- 	 * merge a series of uncharges to an uncharge of res_counter.
- 	 * If not, we uncharge res_counter ony by one.
- 	 */
- 	if (batch->memcg != memcg)
- 		goto direct_uncharge;
- 	/* remember freed charge and uncharge it later */
- 	batch->nr_pages++;
- 	if (uncharge_memsw)
- 		batch->memsw_nr_pages++;
- 	return;
- direct_uncharge:
- 	res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
- 	if (uncharge_memsw)
- 		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
- 	if (unlikely(batch->memcg != memcg))
- 		memcg_oom_recover(memcg);
- }
- 
- /*
-  * uncharge if !page_mapped(page)
-  */
- static struct mem_cgroup *
- __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
- 			     bool end_migration)
- {
- 	struct mem_cgroup *memcg = NULL;
- 	unsigned int nr_pages = 1;
- 	struct page_cgroup *pc;
- 	bool anon;
- 
- 	if (mem_cgroup_disabled())
- 		return NULL;
- 
- 	if (PageTransHuge(page)) {
- 		nr_pages <<= compound_order(page);
- 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- 	}
- 	/*
- 	 * Check if our page_cgroup is valid
- 	 */
- 	pc = lookup_page_cgroup(page);
- 	if (unlikely(!PageCgroupUsed(pc)))
- 		return NULL;
- 
- 	lock_page_cgroup(pc);
- 
- 	memcg = pc->mem_cgroup;
- 
- 	if (!PageCgroupUsed(pc))
- 		goto unlock_out;
- 
- 	anon = PageAnon(page);
- 
- 	switch (ctype) {
- 	case MEM_CGROUP_CHARGE_TYPE_ANON:
- 		/*
- 		 * Generally PageAnon tells if it's the anon statistics to be
- 		 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
- 		 * used before page reached the stage of being marked PageAnon.
- 		 */
- 		anon = true;
- 		/* fallthrough */
- 	case MEM_CGROUP_CHARGE_TYPE_DROP:
- 		/* See mem_cgroup_prepare_migration() */
- 		if (page_mapped(page))
- 			goto unlock_out;
- 		/*
- 		 * Pages under migration may not be uncharged.  But
- 		 * end_migration() /must/ be the one uncharging the
- 		 * unused post-migration page and so it has to call
- 		 * here with the migration bit still set.  See the
- 		 * res_counter handling below.
- 		 */
- 		if (!end_migration && PageCgroupMigration(pc))
- 			goto unlock_out;
- 		break;
- 	case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
- 		if (!PageAnon(page)) {	/* Shared memory */
- 			if (page->mapping && !page_is_file_cache(page))
- 				goto unlock_out;
- 		} else if (page_mapped(page)) /* Anon */
- 				goto unlock_out;
- 		break;
- 	default:
- 		break;
- 	}
- 
- 	mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
- 
- 	ClearPageCgroupUsed(pc);
- 	/*
- 	 * pc->mem_cgroup is not cleared here. It will be accessed when it's
- 	 * freed from LRU. This is safe because uncharged page is expected not
- 	 * to be reused (freed soon). Exception is SwapCache, it's handled by
- 	 * special functions.
- 	 */
- 
- 	unlock_page_cgroup(pc);
- 	/*
- 	 * even after unlock, we have memcg->res.usage here and this memcg
- 	 * will never be freed, so it's safe to call css_get().
- 	 */
- 	memcg_check_events(memcg, page);
- 	if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
- 		mem_cgroup_swap_statistics(memcg, true);
- 		css_get(&memcg->css);
- 	}
- 	/*
- 	 * Migration does not charge the res_counter for the
- 	 * replacement page, so leave it alone when phasing out the
- 	 * page that is unused after the migration.
- 	 */
- 	if (!end_migration && !mem_cgroup_is_root(memcg))
- 		mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
- 
- 	return memcg;
- 
- unlock_out:
- 	unlock_page_cgroup(pc);
- 	return NULL;
- }
- 
- void mem_cgroup_uncharge_page(struct page *page)
- {
- 	/* early check. */
- 	if (page_mapped(page))
- 		return;
- 	VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
- 	/*
- 	 * If the page is in swap cache, uncharge should be deferred
- 	 * to the swap path, which also properly accounts swap usage
- 	 * and handles memcg lifetime.
- 	 *
- 	 * Note that this check is not stable and reclaim may add the
- 	 * page to swap cache at any time after this.  However, if the
- 	 * page is not in swap cache by the time page->mapcount hits
- 	 * 0, there won't be any page table references to the swap
- 	 * slot, and reclaim will free it and not actually write the
- 	 * page to disk.
- 	 */
- 	if (PageSwapCache(page))
- 		return;
- 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
- }
- 
- void mem_cgroup_uncharge_cache_page(struct page *page)
- {
- 	VM_BUG_ON_PAGE(page_mapped(page), page);
- 	VM_BUG_ON_PAGE(page->mapping, page);
- 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
- }
- 
  /*
   * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
   * In that cases, pages are freed continuously and we can expect pages
@@@ -4080,58 -3657,12 +3657,12 @@@ void mem_cgroup_uncharge_end(void
    batch->memcg = NULL;
  }
- #ifdef CONFIG_SWAP
- /*
-  * called after __delete_from_swap_cache() and drop "page" account.
-  * memcg information is recorded to swap_cgroup of "ent"
-  */
- void
- mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
- {
- 	struct mem_cgroup *memcg;
- 	int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
- 
- 	if (!swapout) /* this was a swap cache but the swap is unused ! */
- 		ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
- 
- 	memcg = __mem_cgroup_uncharge_common(page, ctype, false);
- 
- 	/*
- 	 * record memcg information,  if swapout && memcg != NULL,
- 	 * css_get() was called in uncharge().
- 	 */
- 	if (do_swap_account && swapout && memcg)
- 		swap_cgroup_record(ent, mem_cgroup_id(memcg));
- }
- #endif
- 
  #ifdef CONFIG_MEMCG_SWAP
- /*
-  * called from swap_entry_free(). remove record in swap_cgroup and
-  * uncharge "memsw" account.
-  */
- void mem_cgroup_uncharge_swap(swp_entry_t ent)
+ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
+ 					 bool charge)
  {
- 	struct mem_cgroup *memcg;
- 	unsigned short id;
- 
- 	if (!do_swap_account)
- 		return;
- 
- 	id = swap_cgroup_record(ent, 0);
- 	rcu_read_lock();
- 	memcg = mem_cgroup_lookup(id);
- 	if (memcg) {
- 		/*
- 		 * We uncharge this because swap is freed.  This memcg can
- 		 * be obsolete one. We avoid calling css_tryget_online().
- 		 */
- 		if (!mem_cgroup_is_root(memcg))
- 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
- 		mem_cgroup_swap_statistics(memcg, false);
- 		css_put(&memcg->css);
- 	}
- 	rcu_read_unlock();
+ 	int val = (charge) ? 1 : -1;
+ 	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
  }
/**
@@@ -4177,180 -3708,11 +3708,11 @@@ static int mem_cgroup_move_swap_account
  }
  #else
  static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
- 				struct mem_cgroup *from, struct mem_cgroup *to)
- {
- 	return -EINVAL;
- }
- #endif
- 
- /*
-  * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
-  * page belongs to.
-  */
- void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
- 				  struct mem_cgroup **memcgp)
- {
- 	struct mem_cgroup *memcg = NULL;
- 	unsigned int nr_pages = 1;
- 	struct page_cgroup *pc;
- 	enum charge_type ctype;
- 
- 	*memcgp = NULL;
- 
- 	if (mem_cgroup_disabled())
- 		return;
- 
- 	if (PageTransHuge(page))
- 		nr_pages <<= compound_order(page);
- 
- 	pc = lookup_page_cgroup(page);
- 	lock_page_cgroup(pc);
- 	if (PageCgroupUsed(pc)) {
- 		memcg = pc->mem_cgroup;
- 		css_get(&memcg->css);
- 		/*
- 		 * At migrating an anonymous page, its mapcount goes down
- 		 * to 0 and uncharge() will be called. But, even if it's fully
- 		 * unmapped, migration may fail and this page has to be
- 		 * charged again. We set MIGRATION flag here and delay uncharge
- 		 * until end_migration() is called
- 		 *
- 		 * Corner Case Thinking
- 		 * A)
- 		 * When the old page was mapped as Anon and it's unmap-and-freed
- 		 * while migration was ongoing.
- 		 * If unmap finds the old page, uncharge() of it will be delayed
- 		 * until end_migration(). If unmap finds a new page, it's
- 		 * uncharged when it make mapcount to be 1->0. If unmap code
- 		 * finds swap_migration_entry, the new page will not be mapped
- 		 * and end_migration() will find it(mapcount==0).
- 		 *
- 		 * B)
- 		 * When the old page was mapped but migraion fails, the kernel
- 		 * remaps it. A charge for it is kept by MIGRATION flag even
- 		 * if mapcount goes down to 0. We can do remap successfully
- 		 * without charging it again.
- 		 *
- 		 * C)
- 		 * The "old" page is under lock_page() until the end of
- 		 * migration, so, the old page itself will not be swapped-out.
- 		 * If the new page is swapped out before end_migraton, our
- 		 * hook to usual swap-out path will catch the event.
- 		 */
- 		if (PageAnon(page))
- 			SetPageCgroupMigration(pc);
- 	}
- 	unlock_page_cgroup(pc);
- 	/*
- 	 * If the page is not charged at this point,
- 	 * we return here.
- 	 */
- 	if (!memcg)
- 		return;
- 
- 	*memcgp = memcg;
- 	/*
- 	 * We charge new page before it's used/mapped. So, even if unlock_page()
- 	 * is called before end_migration, we can catch all events on this new
- 	 * page. In the case new page is migrated but not remapped, new page's
- 	 * mapcount will be finally 0 and we call uncharge in end_migration().
- 	 */
- 	if (PageAnon(page))
- 		ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
- 	else
- 		ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
- 	/*
- 	 * The page is committed to the memcg, but it's not actually
- 	 * charged to the res_counter since we plan on replacing the
- 	 * old one and only one page is going to be left afterwards.
- 	 */
- 	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
- }
- 
- /* remove redundant charge if migration failed*/
- void mem_cgroup_end_migration(struct mem_cgroup *memcg,
- 	struct page *oldpage, struct page *newpage, bool migration_ok)
- {
- 	struct page *used, *unused;
- 	struct page_cgroup *pc;
- 	bool anon;
- 
- 	if (!memcg)
- 		return;
- 
- 	if (!migration_ok) {
- 		used = oldpage;
- 		unused = newpage;
- 	} else {
- 		used = newpage;
- 		unused = oldpage;
- 	}
- 	anon = PageAnon(used);
- 	__mem_cgroup_uncharge_common(unused,
- 				     anon ? MEM_CGROUP_CHARGE_TYPE_ANON
- 				     : MEM_CGROUP_CHARGE_TYPE_CACHE,
- 				     true);
- 	css_put(&memcg->css);
- 	/*
- 	 * We disallowed uncharge of pages under migration because mapcount
- 	 * of the page goes down to zero, temporarly.
- 	 * Clear the flag and check the page should be charged.
- 	 */
- 	pc = lookup_page_cgroup(oldpage);
- 	lock_page_cgroup(pc);
- 	ClearPageCgroupMigration(pc);
- 	unlock_page_cgroup(pc);
- 
- 	/*
- 	 * If a page is a file cache, radix-tree replacement is very atomic
- 	 * and we can skip this check. When it was an Anon page, its mapcount
- 	 * goes down to 0. But because we added MIGRATION flage, it's not
- 	 * uncharged yet. There are several case but page->mapcount check
- 	 * and USED bit check in mem_cgroup_uncharge_page() will do enough
- 	 * check. (see prepare_charge() also)
- 	 */
- 	if (anon)
- 		mem_cgroup_uncharge_page(used);
- }
- 
- /*
-  * At replace page cache, newpage is not under any memcg but it's on
-  * LRU. So, this function doesn't touch res_counter but handles LRU
-  * in correct way. Both pages are locked so we cannot race with uncharge.
-  */
- void mem_cgroup_replace_page_cache(struct page *oldpage,
- 				  struct page *newpage)
- {
- 	struct mem_cgroup *memcg = NULL;
- 	struct page_cgroup *pc;
- 	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
- 
- 	if (mem_cgroup_disabled())
- 		return;
- 
- 	pc = lookup_page_cgroup(oldpage);
- 	/* fix accounting on old pages */
- 	lock_page_cgroup(pc);
- 	if (PageCgroupUsed(pc)) {
- 		memcg = pc->mem_cgroup;
- 		mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
- 		ClearPageCgroupUsed(pc);
- 	}
- 	unlock_page_cgroup(pc);
- 
- 	/*
- 	 * When called from shmem_replace_page(), in some cases the
- 	 * oldpage has already been charged, and in some cases not.
- 	 */
- 	if (!memcg)
- 		return;
- 	/*
- 	 * Even if newpage->mapping was NULL before starting replacement,
- 	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
- 	 * LRU while we overwrite pc->mem_cgroup.
- 	 */
- 	__mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
+ 				struct mem_cgroup *from, struct mem_cgroup *to)
+ {
+ 	return -EINVAL;
  }
+ #endif
#ifdef CONFIG_DEBUG_VM
  static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
@@@ -4817,78 -4179,24 +4179,24 @@@ out
    return retval;
  }
- 
- static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
- 					       enum mem_cgroup_stat_index idx)
- {
- 	struct mem_cgroup *iter;
- 	long val = 0;
- 
- 	/* Per-cpu values can be negative, use a signed accumulator */
- 	for_each_mem_cgroup_tree(iter, memcg)
- 		val += mem_cgroup_read_stat(iter, idx);
- 
- 	if (val < 0) /* race ? */
- 		val = 0;
- 	return val;
- }
- 
- static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
- {
- 	u64 val;
- 
- 	if (!mem_cgroup_is_root(memcg)) {
- 		if (!swap)
- 			return res_counter_read_u64(&memcg->res, RES_USAGE);
- 		else
- 			return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- 	}
- 
- 	/*
- 	 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- 	 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- 	 */
- 	val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- 	val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
- 
- 	if (swap)
- 		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
- 
- 	return val << PAGE_SHIFT;
- }
- 
  static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
- 				   struct cftype *cft)
+ 			       struct cftype *cft)
  {
    struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- 	u64 val;
- 	int name;
- 	enum res_type type;
- 
- 	type = MEMFILE_TYPE(cft->private);
- 	name = MEMFILE_ATTR(cft->private);
+ 	enum res_type type = MEMFILE_TYPE(cft->private);
+ 	int name = MEMFILE_ATTR(cft->private);
switch (type) {
    case _MEM:
- 		if (name == RES_USAGE)
- 			val = mem_cgroup_usage(memcg, false);
- 		else
- 			val = res_counter_read_u64(&memcg->res, name);
- 		break;
+ 		return res_counter_read_u64(&memcg->res, name);
    case _MEMSWAP:
- 		if (name == RES_USAGE)
- 			val = mem_cgroup_usage(memcg, true);
- 		else
- 			val = res_counter_read_u64(&memcg->memsw, name);
- 		break;
+ 		return res_counter_read_u64(&memcg->memsw, name);
    case _KMEM:
- 		val = res_counter_read_u64(&memcg->kmem, name);
+ 		return res_counter_read_u64(&memcg->kmem, name);
    	break;
    default:
    	BUG();
    }
- 
- 	return val;
  }
#ifdef CONFIG_MEMCG_KMEM
@@@ -5350,7 -4658,10 +4658,10 @@@ static void __mem_cgroup_threshold(stru
    if (!t)
    	goto unlock;
- 	usage = mem_cgroup_usage(memcg, swap);
+ 	if (!swap)
+ 		usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ 	else
+ 		usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
     * current_threshold points to threshold just below or equal to usage.
@@@ -5442,15 -4753,15 +4753,15 @@@ static int __mem_cgroup_usage_register_
mutex_lock(&memcg->thresholds_lock);
- 	if (type == _MEM)
+ 	if (type == _MEM) {
    	thresholds = &memcg->thresholds;
- 	else if (type == _MEMSWAP)
+ 		usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ 	} else if (type == _MEMSWAP) {
    	thresholds = &memcg->memsw_thresholds;
- 	else
+ 		usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ 	} else
    	BUG();
- 	usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
- 
    /* Check if a threshold crossed before adding a new one */
    if (thresholds->primary)
    	__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@@ -5530,18 -4841,19 +4841,19 @@@ static void __mem_cgroup_usage_unregist
    int i, j, size;
mutex_lock(&memcg->thresholds_lock);
- 	if (type == _MEM)
+ 
+ 	if (type == _MEM) {
    	thresholds = &memcg->thresholds;
- 	else if (type == _MEMSWAP)
+ 		usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ 	} else if (type == _MEMSWAP) {
    	thresholds = &memcg->memsw_thresholds;
- 	else
+ 		usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ 	} else
    	BUG();
if (!thresholds->primary)
    	goto unlock;
- 	usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
- 
    /* Check if a threshold crossed before removing */
    __mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@@ -6296,9 -5608,9 +5608,9 @@@ mem_cgroup_css_online(struct cgroup_sub
    	 * core guarantees its existence.
    	 */
    } else {
- 		res_counter_init(&memcg->res, NULL);
- 		res_counter_init(&memcg->memsw, NULL);
- 		res_counter_init(&memcg->kmem, NULL);
+ 		res_counter_init(&memcg->res, &root_mem_cgroup->res);
+ 		res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+ 		res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
    	/*
    	 * Deeper hierachy with use_hierarchy == false doesn't make
    	 * much sense so let cgroup subsystem know about this
@@@ -6407,80 -5719,40 +5719,63 @@@ static void mem_cgroup_css_free(struct 
    __mem_cgroup_free(memcg);
  }
+/**
 + * mem_cgroup_css_reset - reset the states of a mem_cgroup
 + * @css: the target css
 + *
 + * Reset the states of the mem_cgroup associated with @css.  This is
 + * invoked when the userland requests disabling on the default hierarchy
 + * but the memcg is pinned through dependency.  The memcg should stop
 + * applying policies and should revert to the vanilla state as it may be
 + * made visible again.
 + *
 + * The current implementation only resets the essential configurations.
 + * This needs to be expanded to cover all the visible parts.
 + */
 +static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 +{
 +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 +
 +	mem_cgroup_resize_limit(memcg, ULLONG_MAX);
 +	mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX);
 +	memcg_update_kmem_limit(memcg, ULLONG_MAX);
 +	res_counter_set_soft_limit(&memcg->res, ULLONG_MAX);
 +}
 +
  #ifdef CONFIG_MMU
  /* Handlers for move charge at task migration. */
- #define PRECHARGE_COUNT_AT_ONCE	256
  static int mem_cgroup_do_precharge(unsigned long count)
  {
- 	int ret = 0;
- 	int batch_count = PRECHARGE_COUNT_AT_ONCE;
- 	struct mem_cgroup *memcg = mc.to;
+ 	int ret;
- 	if (mem_cgroup_is_root(memcg)) {
+ 	/* Try a single bulk charge without reclaim first */
+ 	ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+ 	if (!ret) {
    	mc.precharge += count;
- 		/* we don't need css_get for root */
    	return ret;
    }
- 	/* try to charge at once */
- 	if (count > 1) {
- 		struct res_counter *dummy;
- 		/*
- 		 * "memcg" cannot be under rmdir() because we've already checked
- 		 * by cgroup_lock_live_cgroup() that it is not removed and we
- 		 * are still under the same cgroup_mutex. So we can postpone
- 		 * css_get().
- 		 */
- 		if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
- 			goto one_by_one;
- 		if (do_swap_account && res_counter_charge(&memcg->memsw,
- 						PAGE_SIZE * count, &dummy)) {
- 			res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
- 			goto one_by_one;
- 		}
- 		mc.precharge += count;
+ 	if (ret == -EINTR) {
+ 		cancel_charge(root_mem_cgroup, count);
    	return ret;
    }
- one_by_one:
- 	/* fall back to one by one charge */
+ 
+ 	/* Try charges one by one with reclaim */
    while (count--) {
- 		if (signal_pending(current)) {
- 			ret = -EINTR;
- 			break;
- 		}
- 		if (!batch_count--) {
- 			batch_count = PRECHARGE_COUNT_AT_ONCE;
- 			cond_resched();
- 		}
- 		ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+ 		ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
+ 		/*
+ 		 * In case of failure, any residual charges against
+ 		 * mc.to will be dropped by mem_cgroup_clear_mc()
+ 		 * later on.  However, cancel any charges that are
+ 		 * bypassed to root right away or they'll be lost.
+ 		 */
+ 		if (ret == -EINTR)
+ 			cancel_charge(root_mem_cgroup, 1);
    	if (ret)
- 			/* mem_cgroup_clear_mc() will do uncharge later */
    		return ret;
    	mc.precharge++;
+ 		cond_resched();
    }
- 	return ret;
+ 	return 0;
  }
/**
@@@ -6616,9 -5888,9 +5911,9 @@@ static enum mc_target_type get_mctgt_ty
    if (page) {
    	pc = lookup_page_cgroup(page);
    	/*
- 		 * Do only loose check w/o page_cgroup lock.
- 		 * mem_cgroup_move_account() checks the pc is valid or not under
- 		 * the lock.
+ 		 * Do only loose check w/o serialization.
+ 		 * mem_cgroup_move_account() checks the pc is valid or
+ 		 * not under LRU exclusion.
    	 */
    	if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
    		ret = MC_TARGET_PAGE;
@@@ -6743,7 -6015,7 +6038,7 @@@ static void __mem_cgroup_clear_mc(void
/* we must uncharge all the leftover precharges from mc.to */
    if (mc.precharge) {
- 		__mem_cgroup_cancel_charge(mc.to, mc.precharge);
+ 		cancel_charge(mc.to, mc.precharge);
    	mc.precharge = 0;
    }
    /*
@@@ -6751,27 -6023,24 +6046,24 @@@
     * we must uncharge here.
     */
    if (mc.moved_charge) {
- 		__mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
+ 		cancel_charge(mc.from, mc.moved_charge);
    	mc.moved_charge = 0;
    }
    /* we must fixup refcnts and charges */
    if (mc.moved_swap) {
    	/* uncharge swap account from the old cgroup */
- 		if (!mem_cgroup_is_root(mc.from))
- 			res_counter_uncharge(&mc.from->memsw,
- 						PAGE_SIZE * mc.moved_swap);
+ 		res_counter_uncharge(&mc.from->memsw,
+ 				     PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
    		css_put(&mc.from->css);
- 		if (!mem_cgroup_is_root(mc.to)) {
- 			/*
- 			 * we charged both to->res and to->memsw, so we should
- 			 * uncharge to->res.
- 			 */
- 			res_counter_uncharge(&mc.to->res,
- 						PAGE_SIZE * mc.moved_swap);
- 		}
+ 		/*
+ 		 * we charged both to->res and to->memsw, so we should
+ 		 * uncharge to->res.
+ 		 */
+ 		res_counter_uncharge(&mc.to->res,
+ 				     PAGE_SIZE * mc.moved_swap);
    	/* we've already done css_get(mc.to) */
    	mc.moved_swap = 0;
    }
@@@ -7042,7 -6311,6 +6334,7 @@@ struct cgroup_subsys memory_cgrp_subsy
    .css_online = mem_cgroup_css_online,
    .css_offline = mem_cgroup_css_offline,
    .css_free = mem_cgroup_css_free,
 +	.css_reset = mem_cgroup_css_reset,
    .can_attach = mem_cgroup_can_attach,
    .cancel_attach = mem_cgroup_cancel_attach,
    .attach = mem_cgroup_move_task,
@@@ -7081,6 -6349,321 +6373,321 @@@ static void __init enable_swap_cgroup(v
  }
  #endif
+ #ifdef CONFIG_MEMCG_SWAP
+ /**
+  * mem_cgroup_swapout - transfer a memsw charge to swap
+  * @page: page whose memsw charge to transfer
+  * @entry: swap entry to move the charge to
+  *
+  * Transfer the memsw charge of @page to @entry.
+  */
+ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ {
+ 	struct page_cgroup *pc;
+ 	unsigned short oldid;
+ 
+ 	VM_BUG_ON_PAGE(PageLRU(page), page);
+ 	VM_BUG_ON_PAGE(page_count(page), page);
+ 
+ 	if (!do_swap_account)
+ 		return;
+ 
+ 	pc = lookup_page_cgroup(page);
+ 
+ 	/* Readahead page, never charged */
+ 	if (!PageCgroupUsed(pc))
+ 		return;
+ 
+ 	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
+ 
+ 	oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ 	VM_BUG_ON_PAGE(oldid, page);
+ 
+ 	pc->flags &= ~PCG_MEMSW;
+ 	css_get(&pc->mem_cgroup->css);
+ 	mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ }
+ 
+ /**
+  * mem_cgroup_uncharge_swap - uncharge a swap entry
+  * @entry: swap entry to uncharge
+  *
+  * Drop the memsw charge associated with @entry.
+  */
+ void mem_cgroup_uncharge_swap(swp_entry_t entry)
+ {
+ 	struct mem_cgroup *memcg;
+ 	unsigned short id;
+ 
+ 	if (!do_swap_account)
+ 		return;
+ 
+ 	id = swap_cgroup_record(entry, 0);
+ 	rcu_read_lock();
+ 	memcg = mem_cgroup_lookup(id);
+ 	if (memcg) {
+ 		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ 		mem_cgroup_swap_statistics(memcg, false);
+ 		css_put(&memcg->css);
+ 	}
+ 	rcu_read_unlock();
+ }
+ #endif
+ 
+ /**
+  * mem_cgroup_try_charge - try charging a page
+  * @page: page to charge
+  * @mm: mm context of the victim
+  * @gfp_mask: reclaim mode
+  * @memcgp: charged memcg return
+  *
+  * Try to charge @page to the memcg that @mm belongs to, reclaiming
+  * pages according to @gfp_mask if necessary.
+  *
+  * Returns 0 on success, with *@memcgp pointing to the charged memcg.
+  * Otherwise, an error code is returned.
+  *
+  * After page->mapping has been set up, the caller must finalize the
+  * charge with mem_cgroup_commit_charge().  Or abort the transaction
+  * with mem_cgroup_cancel_charge() in case page instantiation fails.
+  */
+ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
+ 			  gfp_t gfp_mask, struct mem_cgroup **memcgp)
+ {
+ 	struct mem_cgroup *memcg = NULL;
+ 	unsigned int nr_pages = 1;
+ 	int ret = 0;
+ 
+ 	if (mem_cgroup_disabled())
+ 		goto out;
+ 
+ 	if (PageSwapCache(page)) {
+ 		struct page_cgroup *pc = lookup_page_cgroup(page);
+ 		/*
+ 		 * Every swap fault against a single page tries to charge the
+ 		 * page, bail as early as possible.  shmem_unuse() encounters
+ 		 * already charged pages, too.  The USED bit is protected by
+ 		 * the page lock, which serializes swap cache removal, which
+ 		 * in turn serializes uncharging.
+ 		 */
+ 		if (PageCgroupUsed(pc))
+ 			goto out;
+ 	}
+ 
+ 	if (PageTransHuge(page)) {
+ 		nr_pages <<= compound_order(page);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ 	}
+ 
+ 	if (do_swap_account && PageSwapCache(page))
+ 		memcg = try_get_mem_cgroup_from_page(page);
+ 	if (!memcg)
+ 		memcg = get_mem_cgroup_from_mm(mm);
+ 
+ 	ret = try_charge(memcg, gfp_mask, nr_pages);
+ 
+ 	css_put(&memcg->css);
+ 
+ 	if (ret == -EINTR) {
+ 		memcg = root_mem_cgroup;
+ 		ret = 0;
+ 	}
+ out:
+ 	*memcgp = memcg;
+ 	return ret;
+ }
+ 
+ /**
+  * mem_cgroup_commit_charge - commit a page charge
+  * @page: page to charge
+  * @memcg: memcg to charge the page to
+  * @lrucare: page might be on LRU already
+  *
+  * Finalize a charge transaction started by mem_cgroup_try_charge(),
+  * after page->mapping has been set up.  This must happen atomically
+  * as part of the page instantiation, i.e. under the page table lock
+  * for anonymous pages, under the page lock for page and swap cache.
+  *
+  * In addition, the page must not be on the LRU during the commit, to
+  * prevent racing with task migration.  If it might be, use @lrucare.
+  *
+  * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
+  */
+ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
+ 			      bool lrucare)
+ {
+ 	unsigned int nr_pages = 1;
+ 
+ 	VM_BUG_ON_PAGE(!page->mapping, page);
+ 	VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
+ 
+ 	if (mem_cgroup_disabled())
+ 		return;
+ 	/*
+ 	 * Swap faults will attempt to charge the same page multiple
+ 	 * times.  But reuse_swap_page() might have removed the page
+ 	 * from swapcache already, so we can't check PageSwapCache().
+ 	 */
+ 	if (!memcg)
+ 		return;
+ 
+ 	if (PageTransHuge(page)) {
+ 		nr_pages <<= compound_order(page);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ 	}
+ 
+ 	commit_charge(page, memcg, nr_pages, lrucare);
+ 
+ 	if (do_swap_account && PageSwapCache(page)) {
+ 		swp_entry_t entry = { .val = page_private(page) };
+ 		/*
+ 		 * The swap entry might not get freed for a long time,
+ 		 * let's not wait for it.  The page already received a
+ 		 * memory+swap charge, drop the swap entry duplicate.
+ 		 */
+ 		mem_cgroup_uncharge_swap(entry);
+ 	}
+ }
+ 
+ /**
+  * mem_cgroup_cancel_charge - cancel a page charge
+  * @page: page to charge
+  * @memcg: memcg to charge the page to
+  *
+  * Cancel a charge transaction started by mem_cgroup_try_charge().
+  */
+ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
+ {
+ 	unsigned int nr_pages = 1;
+ 
+ 	if (mem_cgroup_disabled())
+ 		return;
+ 	/*
+ 	 * Swap faults will attempt to charge the same page multiple
+ 	 * times.  But reuse_swap_page() might have removed the page
+ 	 * from swapcache already, so we can't check PageSwapCache().
+ 	 */
+ 	if (!memcg)
+ 		return;
+ 
+ 	if (PageTransHuge(page)) {
+ 		nr_pages <<= compound_order(page);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ 	}
+ 
+ 	cancel_charge(memcg, nr_pages);
+ }
+ 
+ /**
+  * mem_cgroup_uncharge - uncharge a page
+  * @page: page to uncharge
+  *
+  * Uncharge a page previously charged with mem_cgroup_try_charge() and
+  * mem_cgroup_commit_charge().
+  */
+ void mem_cgroup_uncharge(struct page *page)
+ {
+ 	struct memcg_batch_info *batch;
+ 	unsigned int nr_pages = 1;
+ 	struct mem_cgroup *memcg;
+ 	struct page_cgroup *pc;
+ 	unsigned long flags;
+ 
+ 	VM_BUG_ON_PAGE(PageLRU(page), page);
+ 	VM_BUG_ON_PAGE(page_count(page), page);
+ 
+ 	if (mem_cgroup_disabled())
+ 		return;
+ 
+ 	pc = lookup_page_cgroup(page);
+ 
+ 	/* Every final put_page() ends up here */
+ 	if (!PageCgroupUsed(pc))
+ 		return;
+ 
+ 	if (PageTransHuge(page)) {
+ 		nr_pages <<= compound_order(page);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ 	}
+ 	/*
+ 	 * Nobody should be changing or seriously looking at
+ 	 * pc->mem_cgroup and pc->flags at this point, we have fully
+ 	 * exclusive access to the page.
+ 	 */
+ 	memcg = pc->mem_cgroup;
+ 	flags = pc->flags;
+ 	pc->flags = 0;
+ 
+ 	mem_cgroup_charge_statistics(memcg, page, -nr_pages);
+ 	memcg_check_events(memcg, page);
+ 
+ 	batch = &current->memcg_batch;
+ 	if (!batch->memcg)
+ 		batch->memcg = memcg;
+ 	else if (batch->memcg != memcg)
+ 		goto uncharge;
+ 	if (nr_pages > 1)
+ 		goto uncharge;
+ 	if (!batch->do_batch)
+ 		goto uncharge;
+ 	if (test_thread_flag(TIF_MEMDIE))
+ 		goto uncharge;
+ 	if (flags & PCG_MEM)
+ 		batch->nr_pages++;
+ 	if (flags & PCG_MEMSW)
+ 		batch->memsw_nr_pages++;
+ 	return;
+ uncharge:
+ 	if (flags & PCG_MEM)
+ 		res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
+ 	if (flags & PCG_MEMSW)
+ 		res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
+ 	if (batch->memcg != memcg)
+ 		memcg_oom_recover(memcg);
+ }
+ 
+ /**
+  * mem_cgroup_migrate - migrate a charge to another page
+  * @oldpage: currently charged page
+  * @newpage: page to transfer the charge to
+  * @lrucare: page might be on LRU already
+  *
+  * Migrate the charge from @oldpage to @newpage.
+  *
+  * Both pages must be locked, @newpage->mapping must be set up.
+  */
+ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
+ 			bool lrucare)
+ {
+ 	unsigned int nr_pages = 1;
+ 	struct page_cgroup *pc;
+ 
+ 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
+ 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
+ 	VM_BUG_ON_PAGE(PageLRU(oldpage), oldpage);
+ 	VM_BUG_ON_PAGE(PageLRU(newpage), newpage);
+ 	VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
+ 
+ 	if (mem_cgroup_disabled())
+ 		return;
+ 
+ 	pc = lookup_page_cgroup(oldpage);
+ 	if (!PageCgroupUsed(pc))
+ 		return;
+ 
+ 	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
+ 	VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
+ 	pc->flags &= ~(PCG_MEM | PCG_MEMSW);
+ 
+ 	if (PageTransHuge(oldpage)) {
+ 		nr_pages <<= compound_order(oldpage);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
+ 		VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
+ 	}
+ 
+ 	commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
+ }
+ 
  /*
   * subsys_initcall() for memory controller.
   *
diff --combined mm/slub.c
index 8c24a23,9efabba..6641a8f
--- a/mm/slub.c
+++ b/mm/slub.c
@@@ -233,11 -233,6 +233,6 @@@ static inline void stat(const struct km
   * 			Core slab cache functions
   *******************************************************************/
- static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
- {
- 	return s->node[node];
- }
- 
  /* Verify that a pointer has an address that is valid within a slab page */
  static inline int check_valid_pointer(struct kmem_cache *s,
    			struct page *page, const void *object)
@@@ -382,9 -377,9 +377,9 @@@ static inline bool __cmpxchg_double_sla
      defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
    if (s->flags & __CMPXCHG_DOUBLE) {
    	if (cmpxchg_double(&page->freelist, &page->counters,
 -			freelist_old, counters_old,
 -			freelist_new, counters_new))
 -		return 1;
 +				   freelist_old, counters_old,
 +				   freelist_new, counters_new))
 +			return 1;
    } else
  #endif
    {
@@@ -418,9 -413,9 +413,9 @@@ static inline bool cmpxchg_double_slab(
      defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
    if (s->flags & __CMPXCHG_DOUBLE) {
    	if (cmpxchg_double(&page->freelist, &page->counters,
 -			freelist_old, counters_old,
 -			freelist_new, counters_new))
 -		return 1;
 +				   freelist_old, counters_old,
 +				   freelist_new, counters_new))
 +			return 1;
    } else
  #endif
    {
@@@ -945,60 -940,6 +940,6 @@@ static void trace(struct kmem_cache *s
  }
/*
-  * Hooks for other subsystems that check memory allocations. In a typical
-  * production configuration these hooks all should produce no code at all.
-  */
- static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
- {
- 	kmemleak_alloc(ptr, size, 1, flags);
- }
- 
- static inline void kfree_hook(const void *x)
- {
- 	kmemleak_free(x);
- }
- 
- static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
- {
- 	flags &= gfp_allowed_mask;
- 	lockdep_trace_alloc(flags);
- 	might_sleep_if(flags & __GFP_WAIT);
- 
- 	return should_failslab(s->object_size, flags, s->flags);
- }
- 
- static inline void slab_post_alloc_hook(struct kmem_cache *s,
- 					gfp_t flags, void *object)
- {
- 	flags &= gfp_allowed_mask;
- 	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
- 	kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
- }
- 
- static inline void slab_free_hook(struct kmem_cache *s, void *x)
- {
- 	kmemleak_free_recursive(x, s->flags);
- 
- 	/*
- 	 * Trouble is that we may no longer disable interrupts in the fast path
- 	 * So in order to make the debug calls that expect irqs to be
- 	 * disabled we need to disable interrupts temporarily.
- 	 */
- #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
- 	{
- 		unsigned long flags;
- 
- 		local_irq_save(flags);
- 		kmemcheck_slab_free(s, x, s->object_size);
- 		debug_check_no_locks_freed(x, s->object_size);
- 		local_irq_restore(flags);
- 	}
- #endif
- 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
- 		debug_check_no_obj_freed(x, s->object_size);
- }
- 
- /*
   * Tracking of fully allocated slabs for debugging purposes.
   */
  static void add_full(struct kmem_cache *s,
@@@ -1282,6 -1223,12 +1223,12 @@@ static inline void inc_slabs_node(struc
  static inline void dec_slabs_node(struct kmem_cache *s, int node,
    						int objects) {}
+ #endif /* CONFIG_SLUB_DEBUG */
+ 
+ /*
+  * Hooks for other subsystems that check memory allocations. In a typical
+  * production configuration these hooks all should produce no code at all.
+  */
  static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
  {
    kmemleak_alloc(ptr, size, 1, flags);
@@@ -1293,21 -1240,44 +1240,44 @@@ static inline void kfree_hook(const voi
  }
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
- 							{ return 0; }
+ {
+ 	flags &= gfp_allowed_mask;
+ 	lockdep_trace_alloc(flags);
+ 	might_sleep_if(flags & __GFP_WAIT);
+ 
+ 	return should_failslab(s->object_size, flags, s->flags);
+ }
- static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
- 		void *object)
+ static inline void slab_post_alloc_hook(struct kmem_cache *s,
+ 					gfp_t flags, void *object)
  {
- 	kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
- 		flags & gfp_allowed_mask);
+ 	flags &= gfp_allowed_mask;
+ 	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+ 	kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
  }
static inline void slab_free_hook(struct kmem_cache *s, void *x)
  {
    kmemleak_free_recursive(x, s->flags);
- }
- #endif /* CONFIG_SLUB_DEBUG */
+ 	/*
+ 	 * Trouble is that we may no longer disable interrupts in the fast path
+ 	 * So in order to make the debug calls that expect irqs to be
+ 	 * disabled we need to disable interrupts temporarily.
+ 	 */
+ #if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+ 	{
+ 		unsigned long flags;
+ 
+ 		local_irq_save(flags);
+ 		kmemcheck_slab_free(s, x, s->object_size);
+ 		debug_check_no_locks_freed(x, s->object_size);
+ 		local_irq_restore(flags);
+ 	}
+ #endif
+ 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ 		debug_check_no_obj_freed(x, s->object_size);
+ }
/*
   * Slab allocation and freeing
@@@ -1433,7 -1403,7 +1403,7 @@@ static struct page *new_slab(struct kme
    	memset(start, POISON_INUSE, PAGE_SIZE << order);
last = start;
- 	for_each_object(p, s, start, page->objects) {
+ 	for_each_object(p, s, start + s->size, page->objects - 1) {
    	setup_object(s, page, last);
    	set_freepointer(s, last, p);
    	last = p;
@@@ -2064,6 -2034,14 +2034,14 @@@ static void put_cpu_partial(struct kmem
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
    							!= oldpage);
+ 
+ 	if (memcg_cache_dead(s)) {
+ 		unsigned long flags;
+ 
+ 		local_irq_save(flags);
+ 		unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ 		local_irq_restore(flags);
+ 	}
  #endif
  }
@@@ -2162,6 -2140,7 +2140,7 @@@ slab_out_of_memory(struct kmem_cache *s
    static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
    			      DEFAULT_RATELIMIT_BURST);
    int node;
+ 	struct kmem_cache_node *n;
if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
    	return;
@@@ -2176,15 -2155,11 +2155,11 @@@
    	pr_warn("  %s debugging increased min order, use slub_debug=O to disable.\n",
    		s->name);
- 	for_each_online_node(node) {
- 		struct kmem_cache_node *n = get_node(s, node);
+ 	for_each_kmem_cache_node(s, node, n) {
    	unsigned long nr_slabs;
    	unsigned long nr_objs;
    	unsigned long nr_free;
- 		if (!n)
- 			continue;
- 
    	nr_free  = count_partial(n, count_free);
    	nr_slabs = node_nr_slabs(n);
    	nr_objs  = node_nr_objs(n);
@@@ -2673,18 -2648,17 +2648,17 @@@ static __always_inline void slab_free(s
slab_free_hook(s, x);
- redo:
    /*
- 	 * Determine the currently cpus per cpu slab.
- 	 * The cpu may change afterward. However that does not matter since
- 	 * data is retrieved via this pointer. If we are on the same cpu
- 	 * during the cmpxchg then the free will succedd.
+ 	 * We could make this function fully preemptable, but then we wouldn't
+ 	 * have a method to wait for all currently executing kfree's to finish,
+ 	 * which is necessary to avoid use-after-free on per memcg cache
+ 	 * destruction.
     */
    preempt_disable();
+ redo:
    c = this_cpu_ptr(s->cpu_slab);
tid = c->tid;
- 	preempt_enable();
if (likely(page == c->page)) {
    	set_freepointer(s, object, c->freelist);
@@@ -2701,6 -2675,7 +2675,7 @@@
    } else
    	__slab_free(s, page, x, addr);
+ 	preempt_enable();
  }
void kmem_cache_free(struct kmem_cache *s, void *x)
@@@ -2928,13 -2903,10 +2903,10 @@@ static void early_kmem_cache_node_alloc
  static void free_kmem_cache_nodes(struct kmem_cache *s)
  {
    int node;
+ 	struct kmem_cache_node *n;
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		struct kmem_cache_node *n = s->node[node];
- 
- 		if (n)
- 			kmem_cache_free(kmem_cache_node, n);
- 
+ 	for_each_kmem_cache_node(s, node, n) {
+ 		kmem_cache_free(kmem_cache_node, n);
    	s->node[node] = NULL;
    }
  }
@@@ -3199,13 -3171,12 +3171,13 @@@ static void list_slab_objects(struct km
  /*
   * Attempt to free all partial slabs on a node.
   * This is called from kmem_cache_close(). We must be the last thread
 - * using the cache and therefore we do not need to lock anymore.
 + * using the cache, but we still have to lock for lockdep's sake.
   */
  static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  {
    struct page *page, *h;
+	spin_lock_irq(&n->list_lock);
    list_for_each_entry_safe(page, h, &n->partial, lru) {
    	if (!page->inuse) {
    		__remove_partial(n, page);
@@@ -3215,7 -3186,6 +3187,7 @@@
    		"Objects remaining in %s on kmem_cache_close()");
    	}
    }
 +	spin_unlock_irq(&n->list_lock);
  }
/*
@@@ -3224,12 -3194,11 +3196,11 @@@
  static inline int kmem_cache_close(struct kmem_cache *s)
  {
    int node;
+ 	struct kmem_cache_node *n;
flush_all(s);
    /* Attempt to free all objects */
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		struct kmem_cache_node *n = get_node(s, node);
- 
+ 	for_each_kmem_cache_node(s, node, n) {
    	free_partial(s, n);
    	if (n->nr_partial || slabs_node(s, node))
    		return 1;
@@@ -3406,20 -3375,26 +3377,26 @@@ int __kmem_cache_shrink(struct kmem_cac
    struct page *page;
    struct page *t;
    int objects = oo_objects(s->max);
+ 	struct list_head empty_slabs;
    struct list_head *slabs_by_inuse =
    	kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
    unsigned long flags;
- 	if (!slabs_by_inuse)
- 		return -ENOMEM;
+ 	if (memcg_cache_dead(s))
+ 		s->min_partial = 0;
- 	flush_all(s);
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		n = get_node(s, node);
- 
- 		if (!n->nr_partial)
- 			continue;
+ 	if (!slabs_by_inuse) {
+ 		/*
+ 		 * Do not fail shrinking empty slabs if allocation of the
+ 		 * temporary array failed. Just skip the slab placement
+ 		 * optimization then.
+ 		 */
+ 		slabs_by_inuse = &empty_slabs;
+ 		objects = 1;
+ 	}
+ 	flush_all(s);
+ 	for_each_kmem_cache_node(s, node, n) {
    	for (i = 0; i < objects; i++)
    		INIT_LIST_HEAD(slabs_by_inuse + i);
@@@ -3432,7 -3407,9 +3409,9 @@@
    	 * list_lock. page->inuse here is the upper limit.
    	 */
    	list_for_each_entry_safe(page, t, &n->partial, lru) {
- 			list_move(&page->lru, slabs_by_inuse + page->inuse);
+ 			if (page->inuse < objects)
+ 				list_move(&page->lru,
+ 					  slabs_by_inuse + page->inuse);
    		if (!page->inuse)
    			n->nr_partial--;
    	}
@@@ -3451,7 -3428,8 +3430,8 @@@
    		discard_slab(s, page);
    }
- 	kfree(slabs_by_inuse);
+ 	if (slabs_by_inuse != &empty_slabs)
+ 		kfree(slabs_by_inuse);
    return 0;
  }
@@@ -3588,6 -3566,7 +3568,7 @@@ static struct kmem_cache * __init boots
  {
    int node;
    struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+ 	struct kmem_cache_node *n;
memcpy(s, static_cache, kmem_cache->object_size);
@@@ -3597,19 -3576,16 +3578,16 @@@
     * IPIs around.
     */
    __flush_cpu_slab(s, smp_processor_id());
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		struct kmem_cache_node *n = get_node(s, node);
+ 	for_each_kmem_cache_node(s, node, n) {
    	struct page *p;
- 		if (n) {
- 			list_for_each_entry(p, &n->partial, lru)
- 				p->slab_cache = s;
+ 		list_for_each_entry(p, &n->partial, lru)
+ 			p->slab_cache = s;
#ifdef CONFIG_SLUB_DEBUG
- 			list_for_each_entry(p, &n->full, lru)
- 				p->slab_cache = s;
+ 		list_for_each_entry(p, &n->full, lru)
+ 			p->slab_cache = s;
  #endif
- 		}
    }
    list_add(&s->list, &slab_caches);
    return s;
@@@ -3962,16 -3938,14 +3940,14 @@@ static long validate_slab_cache(struct 
    unsigned long count = 0;
    unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
    			sizeof(unsigned long), GFP_KERNEL);
+ 	struct kmem_cache_node *n;
if (!map)
    	return -ENOMEM;
flush_all(s);
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		struct kmem_cache_node *n = get_node(s, node);
- 
+ 	for_each_kmem_cache_node(s, node, n)
    	count += validate_slab_node(s, n, map);
- 	}
    kfree(map);
    return count;
  }
@@@ -4125,6 -4099,7 +4101,7 @@@ static int list_locations(struct kmem_c
    int node;
    unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
    			     sizeof(unsigned long), GFP_KERNEL);
+ 	struct kmem_cache_node *n;
if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
    			     GFP_TEMPORARY)) {
@@@ -4134,8 -4109,7 +4111,7 @@@
    /* Push back cpu slabs */
    flush_all(s);
- 	for_each_node_state(node, N_NORMAL_MEMORY) {
- 		struct kmem_cache_node *n = get_node(s, node);
+ 	for_each_kmem_cache_node(s, node, n) {
    	unsigned long flags;
    	struct page *page;
@@@ -4207,7 -4181,7 +4183,7 @@@
  #endif
#ifdef SLUB_RESILIENCY_TEST
- static void resiliency_test(void)
+ static void __init resiliency_test(void)
  {
    u8 *p;
@@@ -4334,8 -4308,9 +4310,9 @@@ static ssize_t show_slab_objects(struc
    get_online_mems();
  #ifdef CONFIG_SLUB_DEBUG
    if (flags & SO_ALL) {
- 		for_each_node_state(node, N_NORMAL_MEMORY) {
- 			struct kmem_cache_node *n = get_node(s, node);
+ 		struct kmem_cache_node *n;
+ 
+ 		for_each_kmem_cache_node(s, node, n) {
if (flags & SO_TOTAL)
    			x = atomic_long_read(&n->total_objects);
@@@ -4351,9 -4326,9 +4328,9 @@@
    } else
  #endif
    if (flags & SO_PARTIAL) {
- 		for_each_node_state(node, N_NORMAL_MEMORY) {
- 			struct kmem_cache_node *n = get_node(s, node);
+ 		struct kmem_cache_node *n;
+ 		for_each_kmem_cache_node(s, node, n) {
    		if (flags & SO_TOTAL)
    			x = count_partial(n, count_total);
    		else if (flags & SO_OBJECTS)
@@@ -4366,7 -4341,7 +4343,7 @@@
    }
    x = sprintf(buf, "%lu", total);
  #ifdef CONFIG_NUMA
- 	for_each_node_state(node, N_NORMAL_MEMORY)
+ 	for (node = 0; node < nr_node_ids; node++)
    	if (nodes[node])
    		x += sprintf(buf + x, " N%d=%lu",
    				node, nodes[node]);
@@@ -4380,16 -4355,12 +4357,12 @@@
  static int any_slab_objects(struct kmem_cache *s)
  {
    int node;
+ 	struct kmem_cache_node *n;
- 	for_each_online_node(node) {
- 		struct kmem_cache_node *n = get_node(s, node);
- 
- 		if (!n)
- 			continue;
- 
+ 	for_each_kmem_cache_node(s, node, n)
    	if (atomic_long_read(&n->total_objects))
    		return 1;
- 	}
+ 
    return 0;
  }
  #endif
@@@ -5344,13 -5315,9 +5317,9 @@@ void get_slabinfo(struct kmem_cache *s
    unsigned long nr_objs = 0;
    unsigned long nr_free = 0;
    int node;
+ 	struct kmem_cache_node *n;
- 	for_each_online_node(node) {
- 		struct kmem_cache_node *n = get_node(s, node);
- 
- 		if (!n)
- 			continue;
- 
+ 	for_each_kmem_cache_node(s, node, n) {
    	nr_slabs += node_nr_slabs(n);
    	nr_objs += node_nr_objs(n);
    	nr_free += count_partial(n, count_free);
diff --combined net/xfrm/xfrm_policy.c
index 0525d78,92cb08d..beeed60
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -389,7 -389,7 +389,7 @@@ redo
    		if (h != h0)
    			continue;
    		hlist_del(&pol->bydst);
- 			hlist_add_after(entry0, &pol->bydst);
+ 			hlist_add_behind(&pol->bydst, entry0);
    	}
    	entry0 = &pol->bydst;
    }
@@@ -654,7 -654,7 +654,7 @@@ int xfrm_policy_insert(int dir, struct 
    		break;
    }
    if (newpos)
- 		hlist_add_after(newpos, &policy->bydst);
+ 		hlist_add_behind(&policy->bydst, newpos);
    else
    	hlist_add_head(&policy->bydst, chain);
    xfrm_pol_hold(policy);
@@@ -2097,8 -2097,6 +2097,8 @@@ struct dst_entry *xfrm_lookup(struct ne
    			goto no_transform;
    		}
+			dst_hold(&xdst->u.dst);
 +			xdst->u.dst.flags |= DST_NOCACHE;
    		route = xdst->route;
    	}
    }
-- 
LinuxNextTracking

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[linux-next] LinuxNextTracking branch, master, updated. next-20140709