102 files changed, 16418 insertions, 1343 deletions
diff --git a/lib/aarch32/cache_helpers.S b/lib/aarch32/cache_helpers.S
new file mode 100644
index 00000000..810af0f0
--- /dev/null
+++ b/lib/aarch32/cache_helpers.S
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+
+	.globl	flush_dcache_range
+	.globl	clean_dcache_range
+	.globl	inv_dcache_range
+	.globl	dcsw_op_louis
+	.globl	dcsw_op_all
+	.globl	dcsw_op_level1
+	.globl	dcsw_op_level2
+	.globl	dcsw_op_level3
+
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op, coproc, opc1, CRn, CRm, opc2
+	/* Exit early if size is zero */
+	cmp	r1, #0
+	beq	exit_loop_\op
+	dcache_line_size r2, r3
+	add	r1, r0, r1
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+loop_\op:
+	stcopr	r0, \coproc, \opc1, \CRn, \CRm, \opc2
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	loop_\op
+	dsb	sy
+exit_loop_\op:
+	bx	lr
+.endm
+
+	/* ------------------------------------------
+	 * Clean+Invalidate from base address till
+	 * size. 'r0' = addr, 'r1' = size
+	 * ------------------------------------------
+	 */
+func flush_dcache_range
+	do_dcache_maintenance_by_mva cimvac, DCCIMVAC
+endfunc flush_dcache_range
+
+	/* ------------------------------------------
+	 * Clean from base address till size.
+	 * 'r0' = addr, 'r1' = size
+	 * ------------------------------------------
+	 */
+func clean_dcache_range
+	do_dcache_maintenance_by_mva cmvac, DCCMVAC
+endfunc clean_dcache_range
+
+	/* ------------------------------------------
+	 * Invalidate from base address till
+	 * size. 'r0' = addr, 'r1' = size
+	 * ------------------------------------------
+	 */
+func inv_dcache_range
+	do_dcache_maintenance_by_mva imvac, DCIMVAC
+endfunc inv_dcache_range
+
+	/* ----------------------------------------------------------------
+	 * Data cache operations by set/way to the level specified
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * r1: The cache level to begin operation from
+	 * r2: clidr_el1
+	 * r3: The last cache level to operate on
+	 * and will carry out the operation on each data cache from level 0
+	 * to the level in r3 in sequence
+	 *
+	 * The dcsw_op macro sets up the r2 and r3 parameters based on
+	 * clidr_el1 cache information before invoking the main function
+	 * ----------------------------------------------------------------
+	 */
+
+	.macro	dcsw_op shift, fw, ls
+	ldcopr	r2, CLIDR
+	ubfx	r3, r2, \shift, \fw
+	lsl	r3, r3, \ls
+	mov	r1, #0
+	b	do_dcsw_op
+	.endm
+
+func do_dcsw_op
+	push	{r4-r12,lr}
+	adr	r11, dcsw_loop_table	// compute cache op based on the operation type
+	add	r6, r11, r0, lsl #3	// cache op is 2x32-bit instructions
+loop1:
+	add	r10, r1, r1, LSR #1	// Work out 3x current cache level
+	mov	r12, r2, LSR r10	// extract cache type bits from clidr
+	and	r12, r12, #7   		// mask the bits for current cache only
+	cmp	r12, #2			// see what cache we have at this level
+	blo	level_done      	// no cache or only instruction cache at this level
+
+	stcopr	r1, CSSELR		// select current cache level in csselr
+	isb				// isb to sych the new cssr&csidr
+	ldcopr	r12, CCSIDR		// read the new ccsidr
+	and	r10, r12, #7   		// extract the length of the cache lines
+	add	r10, r10, #4        	// add 4 (r10 = line length offset)
+	ubfx	r4, r12, #3, #10	// r4 = maximum way number (right aligned)
+	clz	r5, r4            	// r5 = the bit position of the way size increment
+	mov	r9, r4			// r9 working copy of the aligned max way number
+
+loop2:
+	ubfx	r7, r12, #13, #15	// r7 = max set number (right aligned)
+
+loop3:
+	orr	r0, r1, r9, LSL r5	// factor in the way number and cache level into r0
+	orr	r0, r0, r7, LSL r10	// factor in the set number
+
+	blx	r6
+	subs	r7, r7, #1              // decrement the set number
+	bhs	loop3
+	subs	r9, r9, #1              // decrement the way number
+	bhs	loop2
+level_done:
+	add	r1, r1, #2		// increment the cache number
+	cmp	r3, r1
+	dsb	sy			// ensure completion of previous cache maintenance instruction
+	bhi	loop1
+
+	mov	r6, #0
+	stcopr	r6, CSSELR		//select cache level 0 in csselr
+	dsb	sy
+	isb
+	pop	{r4-r12,pc}
+
+dcsw_loop_table:
+	stcopr	r0, DCISW
+	bx	lr
+	stcopr	r0, DCCISW
+	bx	lr
+	stcopr	r0, DCCSW
+	bx	lr
+
+endfunc do_dcsw_op
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way till PoU.
+	 *
+	 * The function requires :
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_louis
+	dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+endfunc	dcsw_op_louis
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way till PoC.
+	 *
+	 * The function requires :
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_all
+	dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+endfunc	dcsw_op_all
+
+
+	/* ---------------------------------------------------------------
+	 *  Helper macro for data cache operations by set/way for the
+	 *  level specified
+	 * ---------------------------------------------------------------
+	 */
+	.macro	dcsw_op_level level
+	ldcopr	r2, CLIDR
+	mov	r3, \level
+	sub	r1, r3, #2
+	b	do_dcsw_op
+	.endm
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 1 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level1
+	dcsw_op_level #(1 << LEVEL_SHIFT)
+endfunc dcsw_op_level1
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 2 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level2
+	dcsw_op_level #(2 << LEVEL_SHIFT)
+endfunc dcsw_op_level2
+
+	/* ---------------------------------------------------------------
+	 * Data cache operations by set/way for level 3 cache
+	 *
+	 * The main function, do_dcsw_op requires:
+	 * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW),
+	 * as defined in arch.h
+	 * ---------------------------------------------------------------
+	 */
+func dcsw_op_level3
+	dcsw_op_level #(3 << LEVEL_SHIFT)
+endfunc dcsw_op_level3
diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S
new file mode 100644
index 00000000..77cf6cd9
--- /dev/null
+++ b/lib/aarch32/misc_helpers.S
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+
+	.globl	smc
+	.globl	zeromem
+	.globl	zero_normalmem
+	.globl	memcpy4
+	.globl	disable_mmu_icache_secure
+	.globl	disable_mmu_secure
+
+func smc
+	/*
+	 * For AArch32 only r0-r3 will be in the registers;
+	 * rest r4-r6 will be pushed on to the stack. So here, we'll
+	 * have to load them from the stack to registers r4-r6 explicitly.
+	 * Clobbers: r4-r6
+	 */
+	ldm	sp, {r4, r5, r6}
+	smc	#0
+endfunc smc
+
+/* -----------------------------------------------------------------------
+ * void zeromem(void *mem, unsigned int length)
+ *
+ * Initialise a region in normal memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
+ * -----------------------------------------------------------------------
+ */
+func zeromem
+	/*
+	 * Readable names for registers
+	 *
+	 * Registers r0, r1 and r2 are also set by zeromem which
+	 * branches into the fallback path directly, so cursor, length and
+	 * stop_address should not be retargeted to other registers.
+	 */
+	cursor       .req r0 /* Start address and then current address */
+	length       .req r1 /* Length in bytes of the region to zero out */
+	/*
+	 * Reusing the r1 register as length is only used at the beginning of
+	 * the function.
+	 */
+	stop_address .req r1  /* Address past the last zeroed byte */
+	zeroreg1     .req r2  /* Source register filled with 0 */
+	zeroreg2     .req r3  /* Source register filled with 0 */
+	tmp	     .req r12 /* Temporary scratch register */
+
+	mov	zeroreg1, #0
+
+	/* stop_address is the address past the last to zero */
+	add	stop_address, cursor, length
+
+	/*
+	 * Length cannot be used anymore as it shares the same register with
+	 * stop_address.
+	 */
+	.unreq	length
+
+	/*
+	 * If the start address is already aligned to 8 bytes, skip this loop.
+	 */
+	tst	cursor, #(8-1)
+	beq	.Lzeromem_8bytes_aligned
+
+	/* Calculate the next address aligned to 8 bytes */
+	orr	tmp, cursor, #(8-1)
+	adds	tmp, tmp, #1
+	/* If it overflows, fallback to byte per byte zeroing */
+	beq	.Lzeromem_1byte_aligned
+	/* If the next aligned address is after the stop address, fall back */
+	cmp	tmp, stop_address
+	bhs	.Lzeromem_1byte_aligned
+
+	/* zero byte per byte */
+1:
+	strb	zeroreg1, [cursor], #1
+	cmp	cursor, tmp
+	bne	1b
+
+	/* zero 8 bytes at a time */
+.Lzeromem_8bytes_aligned:
+
+	/* Calculate the last 8 bytes aligned address. */
+	bic	tmp, stop_address, #(8-1)
+
+	cmp	cursor, tmp
+	bhs	2f
+
+	mov	zeroreg2, #0
+1:
+	stmia	cursor!, {zeroreg1, zeroreg2}
+	cmp	cursor, tmp
+	blo	1b
+2:
+
+	/* zero byte per byte */
+.Lzeromem_1byte_aligned:
+	cmp	cursor, stop_address
+	beq	2f
+1:
+	strb	zeroreg1, [cursor], #1
+	cmp	cursor, stop_address
+	bne	1b
+2:
+	bx	lr
+
+	.unreq	cursor
+	/*
+	 * length is already unreq'ed to reuse the register for another
+	 * variable.
+	 */
+	.unreq	stop_address
+	.unreq	zeroreg1
+	.unreq	zeroreg2
+	.unreq	tmp
+endfunc zeromem
+
+/*
+ * AArch32 does not have special ways of zeroing normal memory as AArch64 does
+ * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem.
+ */
+.equ	zero_normalmem, zeromem
+
+/* --------------------------------------------------------------------------
+ * void memcpy4(void *dest, const void *src, unsigned int length)
+ *
+ * Copy length bytes from memory area src to memory area dest.
+ * The memory areas should not overlap.
+ * Destination and source addresses must be 4-byte aligned.
+ * --------------------------------------------------------------------------
+ */
+func memcpy4
+#if ENABLE_ASSERTIONS
+	orr	r3, r0, r1
+	tst	r3, #0x3
+	ASM_ASSERT(eq)
+#endif
+/* copy 4 bytes at a time */
+m_loop4:
+	cmp	r2, #4
+	blo	m_loop1
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	sub	r2, r2, #4
+	b	m_loop4
+/* copy byte per byte */
+m_loop1:
+	cmp	r2,#0
+	beq	m_end
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	subs	r2, r2, #1
+	bne	m_loop1
+m_end:
+	bx	lr
+endfunc memcpy4
+
+/* ---------------------------------------------------------------------------
+ * Disable the MMU in Secure State
+ * ---------------------------------------------------------------------------
+ */
+
+func disable_mmu_secure
+	mov	r1, #(SCTLR_M_BIT | SCTLR_C_BIT)
+do_disable_mmu:
+	ldcopr	r0, SCTLR
+	bic	r0, r0, r1
+	stcopr	r0, SCTLR
+	isb				// ensure MMU is off
+	dsb	sy
+	bx	lr
+endfunc disable_mmu_secure
+
+
+func disable_mmu_icache_secure
+	ldr	r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
+	b	do_disable_mmu
+endfunc disable_mmu_icache_secure
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index dc601021..9c40b9db 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -1,37 +1,14 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
 #include <asm_macros.S>
 
 	.globl	flush_dcache_range
+	.globl	clean_dcache_range
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
 	.globl	dcsw_op_all
@@ -39,24 +16,42 @@
 	.globl	dcsw_op_level2
 	.globl	dcsw_op_level3
 
-	/* ------------------------------------------
-	 * Clean+Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
-	 * ------------------------------------------
-	 */
-func flush_dcache_range
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
+	/* Exit early if size is zero */
+	cbz	x1, exit_loop_\op
 	dcache_line_size x2, x3
 	add	x1, x0, x1
 	sub	x3, x2, #1
 	bic	x0, x0, x3
-flush_loop:
-	dc	civac, x0
+loop_\op:
+	dc	\op, x0
 	add	x0, x0, x2
 	cmp	x0, x1
-	b.lo    flush_loop
+	b.lo    loop_\op
 	dsb	sy
+exit_loop_\op:
 	ret
+.endm
+	/* ------------------------------------------
+	 * Clean+Invalidate from base address till
+	 * size. 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func flush_dcache_range
+	do_dcache_maintenance_by_mva civac
+endfunc flush_dcache_range
 
+	/* ------------------------------------------
+	 * Clean from base address till size.
+	 * 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 */
+func clean_dcache_range
+	do_dcache_maintenance_by_mva cvac
+endfunc clean_dcache_range
 
 	/* ------------------------------------------
 	 * Invalidate from base address till
@@ -64,17 +59,8 @@ flush_loop:
 	 * ------------------------------------------
 	 */
 func inv_dcache_range
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-inv_loop:
-	dc	ivac, x0
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo    inv_loop
-	dsb	sy
-	ret
+	do_dcache_maintenance_by_mva ivac
+endfunc inv_dcache_range
 
 
 	/* ---------------------------------------------------------------
@@ -112,7 +98,7 @@ loop1:
 	lsr	x1, x0, x2		// extract cache type bits from clidr
 	and	x1, x1, #7		// mask the bits for current cache only
 	cmp	x1, #2			// see what cache we have at this level
-	b.lt	level_done		// nothing to do if no cache or icache
+	b.lo	level_done		// nothing to do if no cache or icache
 
 	msr	csselr_el1, x10		// select current cache level in csselr
 	isb				// isb to sych the new cssr&csidr
@@ -137,10 +123,10 @@ loop3_\_op:
 	orr	w11, w9, w7		// combine cache, way and set number
 	dc	\_op, x11
 	subs	w7, w7, w17		// decrement set number
-	b.ge	loop3_\_op
+	b.hs	loop3_\_op
 
 	subs	x9, x9, x16		// decrement way number
-	b.ge	loop2_\_op
+	b.hs	loop2_\_op
 
 	b	level_done
 	.endm
@@ -148,12 +134,13 @@ loop3_\_op:
 level_done:
 	add	x10, x10, #2		// increment cache number
 	cmp	x3, x10
-	b.gt    loop1
+	b.hi    loop1
 	msr	csselr_el1, xzr		// select cache level 0 in csselr
 	dsb	sy			// barrier to complete final cache operation
 	isb
 exit:
 	ret
+endfunc do_dcsw_op
 
 dcsw_loop_table:
 	dcsw_loop isw
@@ -163,10 +150,12 @@ dcsw_loop_table:
 
 func dcsw_op_louis
 	dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+endfunc dcsw_op_louis
 
 
 func dcsw_op_all
 	dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT
+endfunc dcsw_op_all
 
 	/* ---------------------------------------------------------------
 	 *  Helper macro for data cache operations by set/way for the
@@ -189,6 +178,7 @@ func dcsw_op_all
 	 */
 func dcsw_op_level1
 	dcsw_op_level #(1 << LEVEL_SHIFT)
+endfunc dcsw_op_level1
 
 	/* ---------------------------------------------------------------
 	 * Data cache operations by set/way for level 2 cache
@@ -199,6 +189,7 @@ func dcsw_op_level1
 	 */
 func dcsw_op_level2
 	dcsw_op_level #(2 << LEVEL_SHIFT)
+endfunc dcsw_op_level2
 
 	/* ---------------------------------------------------------------
 	 * Data cache operations by set/way for level 3 cache
@@ -209,3 +200,4 @@ func dcsw_op_level2
 	 */
 func dcsw_op_level3
 	dcsw_op_level #(3 << LEVEL_SHIFT)
+endfunc dcsw_op_level3
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index f605bf40..9dfe46a2 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -1,31 +1,7 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
@@ -37,10 +13,14 @@
 	.globl	eret
 	.globl	smc
 
+	.globl	zero_normalmem
+	.globl	zeromem
 	.globl	zeromem16
 	.globl	memcpy16
 
+	.globl	disable_mmu_el1
 	.globl	disable_mmu_el3
+	.globl	disable_mmu_icache_el1
 	.globl	disable_mmu_icache_el3
 
 #if SUPPORT_VFP
@@ -53,6 +33,7 @@ func get_afflvl_shift
 	mov	x1, #MPIDR_AFFLVL_SHIFT
 	lsl	x0, x0, x1
 	ret
+endfunc get_afflvl_shift
 
 func mpidr_mask_lower_afflvls
 	cmp	x1, #3
@@ -62,43 +43,375 @@ func mpidr_mask_lower_afflvls
 	lsr	x0, x0, x2
 	lsl	x0, x0, x2
 	ret
+endfunc mpidr_mask_lower_afflvls
 
 
 func eret
 	eret
+endfunc eret
 
 
 func smc
 	smc	#0
+endfunc smc
 
 /* -----------------------------------------------------------------------
  * void zeromem16(void *mem, unsigned int length);
  *
  * Initialise a memory region to 0.
  * The memory address must be 16-byte aligned.
+ * NOTE: This function is deprecated and zeromem should be used instead.
  * -----------------------------------------------------------------------
  */
-func zeromem16
-#if ASM_ASSERTION
-	tst	x0, #0xf
-	ASM_ASSERT(eq)
-#endif
+.equ	zeromem16, zeromem
+
+/* -----------------------------------------------------------------------
+ * void zero_normalmem(void *mem, unsigned int length);
+ *
+ * Initialise a region in normal memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
+ * NOTE: MMU must be enabled when using this function as it can only operate on
+ *       normal memory. It is intended to be mainly used from C code when MMU
+ *       is usually enabled.
+ * -----------------------------------------------------------------------
+ */
+.equ	zero_normalmem, zeromem_dczva
+
+/* -----------------------------------------------------------------------
+ * void zeromem(void *mem, unsigned int length);
+ *
+ * Initialise a region of device memory to 0. This functions complies with the
+ * AAPCS and can be called from C code.
+ *
+ * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
+ *       used instead for faster zeroing.
+ *
+ * -----------------------------------------------------------------------
+ */
+func zeromem
+	/* x2 is the address past the last zeroed address */
 	add	x2, x0, x1
-/* zero 16 bytes at a time */
-z_loop16:
-	sub	x3, x2, x0
-	cmp	x3, #16
-	b.lt	z_loop1
-	stp	xzr, xzr, [x0], #16
-	b	z_loop16
-/* zero byte per byte */
-z_loop1:
-	cmp	x0, x2
-	b.eq	z_end
-	strb	wzr, [x0], #1
-	b	z_loop1
-z_end:	ret
+	/*
+	 * Uses the fallback path that does not use DC ZVA instruction and
+	 * therefore does not need enabled MMU
+	 */
+	b	.Lzeromem_dczva_fallback_entry
+endfunc zeromem
+
+/* -----------------------------------------------------------------------
+ * void zeromem_dczva(void *mem, unsigned int length);
+ *
+ * Fill a region of normal memory of size "length" in bytes with null bytes.
+ * MMU must be enabled and the memory be of
+ * normal type. This is because this function internally uses the DC ZVA
+ * instruction, which generates an Alignment fault if used on any type of
+ * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
+ * is disabled, all memory behaves like Device-nGnRnE memory (see section
+ * D4.2.8), hence the requirement on the MMU being enabled.
+ * NOTE: The code assumes that the block size as defined in DCZID_EL0
+ *       register is at least 16 bytes.
+ *
+ * -----------------------------------------------------------------------
+ */
+func zeromem_dczva
+
+	/*
+	 * The function consists of a series of loops that zero memory one byte
+	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
+	 * zero aligned block of bytes, which is assumed to be more than 16.
+	 * In the case where the DC ZVA instruction cannot be used or if the
+	 * first 16 bytes loop would overflow, there is fallback path that does
+	 * not use DC ZVA.
+	 * Note: The fallback path is also used by the zeromem function that
+	 *       branches to it directly.
+	 *
+	 *              +---------+   zeromem_dczva
+	 *              |  entry  |
+	 *              +----+----+
+	 *                   |
+	 *                   v
+	 *              +---------+
+	 *              | checks  |>o-------+ (If any check fails, fallback)
+	 *              +----+----+         |
+	 *                   |              |---------------+
+	 *                   v              | Fallback path |
+	 *            +------+------+       |---------------+
+	 *            | 1 byte loop |       |
+	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
+	 *                   |              |
+	 *                   v              |
+	 *           +-------+-------+      |
+	 *           | 16 bytes loop |      |
+	 *           +-------+-------+      |
+	 *                   |              |
+	 *                   v              |
+	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
+	 *            | DC ZVA loop |       |
+	 *            +------+------+       |
+	 *       +--------+  |              |
+	 *       |        |  |              |
+	 *       |        v  v              |
+	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
+	 *       |   | 16 bytes loop |      |
+	 *       |   +-------+-------+      |
+	 *       |           |              |
+	 *       |           v              |
+	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
+	 *       |    | 1 byte loop |       |
+	 *       |    +-------------+       |
+	 *       |           |              |
+	 *       |           v              |
+	 *       |       +---+--+           |
+	 *       |       | exit |           |
+	 *       |       +------+           |
+	 *       |			    |
+	 *       |           +--------------+    +------------------+ zeromem
+	 *       |           |  +----------------| zeromem function |
+	 *       |           |  |                +------------------+
+	 *       |           v  v
+	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
+	 *       |    | 1 byte loop |
+	 *       |    +------+------+
+	 *       |           |
+	 *       +-----------+
+	 */
+
+	/*
+	 * Readable names for registers
+	 *
+	 * Registers x0, x1 and x2 are also set by zeromem which
+	 * branches into the fallback path directly, so cursor, length and
+	 * stop_address should not be retargeted to other registers.
+	 */
+	cursor       .req x0 /* Start address and then current address */
+	length       .req x1 /* Length in bytes of the region to zero out */
+	/* Reusing x1 as length is never used after block_mask is set */
+	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
+	stop_address .req x2 /* Address past the last zeroed byte */
+	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
+	tmp1         .req x4
+	tmp2         .req x5
+
+#if ENABLE_ASSERTIONS
+	/*
+	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
+	 * register value and panic if the MMU is disabled.
+	 */
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
+	mrs	tmp1, sctlr_el3
+#else
+	mrs	tmp1, sctlr_el1
+#endif
+
+	tst	tmp1, #SCTLR_M_BIT
+	ASM_ASSERT(ne)
+#endif /* ENABLE_ASSERTIONS */
+
+	/* stop_address is the address past the last to zero */
+	add	stop_address, cursor, length
+
+	/*
+	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
+	 * dczid_el0 reg)
+	 */
+	mrs	block_size, dczid_el0
+
+	/*
+	 * Select the 4 lowest bits and convert the extracted log2(<block size
+	 * in words>) to <block size in bytes>
+	 */
+	ubfx	block_size, block_size, #0, #4
+	mov	tmp2, #(1 << 2)
+	lsl	block_size, tmp2, block_size
+
+#if ENABLE_ASSERTIONS
+	/*
+	 * Assumes block size is at least 16 bytes to avoid manual realignment
+	 * of the cursor at the end of the DCZVA loop.
+	 */
+	cmp	block_size, #16
+	ASM_ASSERT(hs)
+#endif
+	/*
+	 * Not worth doing all the setup for a region less than a block and
+	 * protects against zeroing a whole block when the area to zero is
+	 * smaller than that. Also, as it is assumed that the block size is at
+	 * least 16 bytes, this also protects the initial aligning loops from
+	 * trying to zero 16 bytes when length is less than 16.
+	 */
+	cmp	length, block_size
+	b.lo	.Lzeromem_dczva_fallback_entry
+
+	/*
+	 * Calculate the bitmask of the block alignment. It will never
+	 * underflow as the block size is between 4 bytes and 2kB.
+	 * block_mask = block_size - 1
+	 */
+	sub	block_mask, block_size, #1
+
+	/*
+	 * length alias should not be used after this point unless it is
+	 * defined as a register other than block_mask's.
+	 */
+	 .unreq length
+
+	/*
+	 * If the start address is already aligned to zero block size, go
+	 * straight to the cache zeroing loop. This is safe because at this
+	 * point, the length cannot be smaller than a block size.
+	 */
+	tst	cursor, block_mask
+	b.eq	.Lzeromem_dczva_blocksize_aligned
+
+	/*
+	 * Calculate the first block-size-aligned address. It is assumed that
+	 * the zero block size is at least 16 bytes. This address is the last
+	 * address of this initial loop.
+	 */
+	orr	tmp1, cursor, block_mask
+	add	tmp1, tmp1, #1
+
+	/*
+	 * If the addition overflows, skip the cache zeroing loops. This is
+	 * quite unlikely however.
+	 */
+	cbz	tmp1, .Lzeromem_dczva_fallback_entry
+
+	/*
+	 * If the first block-size-aligned address is past the last address,
+	 * fallback to the simpler code.
+	 */
+	cmp	tmp1, stop_address
+	b.hi	.Lzeromem_dczva_fallback_entry
+
+	/*
+	 * If the start address is already aligned to 16 bytes, skip this loop.
+	 * It is safe to do this because tmp1 (the stop address of the initial
+	 * 16 bytes loop) will never be greater than the final stop address.
+	 */
+	tst	cursor, #0xf
+	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
+
+	/* Calculate the next address aligned to 16 bytes */
+	orr	tmp2, cursor, #0xf
+	add	tmp2, tmp2, #1
+	/* If it overflows, fallback to the simple path (unlikely) */
+	cbz	tmp2, .Lzeromem_dczva_fallback_entry
+	/*
+	 * Next aligned address cannot be after the stop address because the
+	 * length cannot be smaller than 16 at this point.
+	 */
+
+	/* First loop: zero byte per byte */
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, tmp2
+	b.ne	1b
+.Lzeromem_dczva_initial_1byte_aligned_end:
+
+	/*
+	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
+	 * before being able to use the code that deals with block-size-aligned
+	 * addresses.
+	 */
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	stp	xzr, xzr, [cursor], #16
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/*
+	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
+	 * instruction.
+	 */
+.Lzeromem_dczva_blocksize_aligned:
+	/*
+	 * Calculate the last block-size-aligned address. If the result equals
+	 * to the start address, the loop will exit immediately.
+	 */
+	bic	tmp1, stop_address, block_mask
+
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	/* Zero the block containing the cursor */
+	dc	zva, cursor
+	/* Increment the cursor by the size of a block */
+	add	cursor, cursor, block_size
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/*
+	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
+	 * remaining area
+	 */
+.Lzeromem_dczva_final_16bytes_aligned:
+	/*
+	 * Calculate the last 16 bytes aligned address. It is assumed that the
+	 * block size will never be smaller than 16 bytes so that the current
+	 * cursor is aligned to at least 16 bytes boundary.
+	 */
+	bic	tmp1, stop_address, #15
+
+	cmp	cursor, tmp1
+	b.hs	2f
+1:
+	stp	xzr, xzr, [cursor], #16
+	cmp	cursor, tmp1
+	b.lo	1b
+2:
+
+	/* Fifth and final loop: zero byte per byte */
+.Lzeromem_dczva_final_1byte_aligned:
+	cmp	cursor, stop_address
+	b.eq	2f
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, stop_address
+	b.ne	1b
+2:
+	ret
 
+	/* Fallback for unaligned start addresses */
+.Lzeromem_dczva_fallback_entry:
+	/*
+	 * If the start address is already aligned to 16 bytes, skip this loop.
+	 */
+	tst	cursor, #0xf
+	b.eq	.Lzeromem_dczva_final_16bytes_aligned
+
+	/* Calculate the next address aligned to 16 bytes */
+	orr	tmp1, cursor, #15
+	add	tmp1, tmp1, #1
+	/* If it overflows, fallback to byte per byte zeroing */
+	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
+	/* If the next aligned address is after the stop address, fall back */
+	cmp	tmp1, stop_address
+	b.hs	.Lzeromem_dczva_final_1byte_aligned
+
+	/* Fallback entry loop: zero byte per byte */
+1:
+	strb	wzr, [cursor], #1
+	cmp	cursor, tmp1
+	b.ne	1b
+
+	b	.Lzeromem_dczva_final_16bytes_aligned
+
+	.unreq	cursor
+	/*
+	 * length is already unreq'ed to reuse the register for another
+	 * variable.
+	 */
+	.unreq	stop_address
+	.unreq	block_size
+	.unreq	block_mask
+	.unreq	tmp1
+	.unreq	tmp2
+endfunc zeromem_dczva
 
 /* --------------------------------------------------------------------------
  * void memcpy16(void *dest, const void *src, unsigned int length)
@@ -109,7 +422,7 @@ z_end:	ret
  * --------------------------------------------------------------------------
  */
 func memcpy16
-#if ASM_ASSERTION
+#if ENABLE_ASSERTIONS
 	orr	x3, x0, x1
 	tst	x3, #0xf
 	ASM_ASSERT(eq)
@@ -117,7 +430,7 @@ func memcpy16
 /* copy 16 bytes at a time */
 m_loop16:
 	cmp	x2, #16
-	b.lt	m_loop1
+	b.lo	m_loop1
 	ldp	x3, x4, [x1], #16
 	stp	x3, x4, [x0], #16
 	sub	x2, x2, #16
@@ -129,30 +442,53 @@ m_loop1:
 	strb	w3, [x0], #1
 	subs	x2, x2, #1
 	b.ne	m_loop1
-m_end:	ret
+m_end:
+	ret
+endfunc memcpy16
 
 /* ---------------------------------------------------------------------------
  * Disable the MMU at EL3
- * This is implemented in assembler to ensure that the data cache is cleaned
- * and invalidated after the MMU is disabled without any intervening cacheable
- * data accesses
  * ---------------------------------------------------------------------------
  */
 
 func disable_mmu_el3
 	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
-do_disable_mmu:
+do_disable_mmu_el3:
 	mrs	x0, sctlr_el3
 	bic	x0, x0, x1
 	msr	sctlr_el3, x0
-	isb				// ensure MMU is off
-	mov	x0, #DCCISW		// DCache clean and invalidate
-	b	dcsw_op_all
+	isb	/* ensure MMU is off */
+	dsb	sy
+	ret
+endfunc disable_mmu_el3
 
 
 func disable_mmu_icache_el3
 	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
-	b	do_disable_mmu
+	b	do_disable_mmu_el3
+endfunc disable_mmu_icache_el3
+
+/* ---------------------------------------------------------------------------
+ * Disable the MMU at EL1
+ * ---------------------------------------------------------------------------
+ */
+
+func disable_mmu_el1
+	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
+do_disable_mmu_el1:
+	mrs	x0, sctlr_el1
+	bic	x0, x0, x1
+	msr	sctlr_el1, x0
+	isb	/* ensure MMU is off */
+	dsb	sy
+	ret
+endfunc disable_mmu_el1
+
+
+func disable_mmu_icache_el1
+	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
+	b	do_disable_mmu_el1
+endfunc disable_mmu_icache_el1
 
 /* ---------------------------------------------------------------------------
  * Enable the use of VFP at EL3
@@ -169,4 +505,5 @@ func enable_vfp
 	msr	cptr_el3, x0
 	isb
 	ret
+endfunc enable_vfp
 #endif
diff --git a/lib/aarch64/xlat_helpers.c b/lib/aarch64/xlat_helpers.c
deleted file mode 100644
index d401ffc4..00000000
--- a/lib/aarch64/xlat_helpers.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <arch.h>
-#include <assert.h>
-
-/*******************************************************************************
- * Helper to create a level 1/2 table descriptor which points to a level 2/3
- * table.
- ******************************************************************************/
-unsigned long create_table_desc(unsigned long *next_table_ptr)
-{
-	unsigned long desc = (unsigned long) next_table_ptr;
-
-	/* Clear the last 12 bits */
-	desc >>= FOUR_KB_SHIFT;
-	desc <<= FOUR_KB_SHIFT;
-
-	desc |= TABLE_DESC;
-
-	return desc;
-}
-
-/*******************************************************************************
- * Helper to create a level 1/2/3 block descriptor which maps the va to addr
- ******************************************************************************/
-unsigned long create_block_desc(unsigned long desc,
-				unsigned long addr,
-				unsigned int level)
-{
-	switch (level) {
-	case LEVEL1:
-		desc |= (addr << FIRST_LEVEL_DESC_N) | BLOCK_DESC;
-		break;
-	case LEVEL2:
-		desc |= (addr << SECOND_LEVEL_DESC_N) | BLOCK_DESC;
-		break;
-	case LEVEL3:
-		desc |= (addr << THIRD_LEVEL_DESC_N) | TABLE_DESC;
-		break;
-	default:
-		assert(0);
-	}
-
-	return desc;
-}
-
-/*******************************************************************************
- * Helper to create a level 1/2/3 block descriptor which maps the va to output_
- * addr with Device nGnRE attributes.
- ******************************************************************************/
-unsigned long create_device_block(unsigned long output_addr,
-				  unsigned int level,
-				  unsigned int ns)
-{
-	unsigned long upper_attrs, lower_attrs, desc;
-
-	lower_attrs = LOWER_ATTRS(ACCESS_FLAG | OSH | AP_RW);
-	lower_attrs |= LOWER_ATTRS(ns | ATTR_DEVICE_INDEX);
-	upper_attrs = UPPER_ATTRS(XN);
-	desc = upper_attrs | lower_attrs;
-
-	return create_block_desc(desc, output_addr, level);
-}
-
-/*******************************************************************************
- * Helper to create a level 1/2/3 block descriptor which maps the va to output_
- * addr with inner-shareable normal wbwa read-only memory attributes.
- ******************************************************************************/
-unsigned long create_romem_block(unsigned long output_addr,
-				 unsigned int level,
-				 unsigned int ns)
-{
-	unsigned long upper_attrs, lower_attrs, desc;
-
-	lower_attrs = LOWER_ATTRS(ACCESS_FLAG | ISH | AP_RO);
-	lower_attrs |= LOWER_ATTRS(ns | ATTR_IWBWA_OWBWA_NTR_INDEX);
-	upper_attrs = UPPER_ATTRS(0ull);
-	desc = upper_attrs | lower_attrs;
-
-	return create_block_desc(desc, output_addr, level);
-}
-
-/*******************************************************************************
- * Helper to create a level 1/2/3 block descriptor which maps the va to output_
- * addr with inner-shareable normal wbwa read-write memory attributes.
- ******************************************************************************/
-unsigned long create_rwmem_block(unsigned long output_addr,
-				 unsigned int level,
-				 unsigned int ns)
-{
-	unsigned long upper_attrs, lower_attrs, desc;
-
-	lower_attrs = LOWER_ATTRS(ACCESS_FLAG | ISH | AP_RW);
-	lower_attrs |= LOWER_ATTRS(ns | ATTR_IWBWA_OWBWA_NTR_INDEX);
-	upper_attrs = UPPER_ATTRS(XN);
-	desc = upper_attrs | lower_attrs;
-
-	return create_block_desc(desc, output_addr, level);
-}
diff --git a/lib/aarch64/xlat_tables.c b/lib/aarch64/xlat_tables.c
index ddc9ba88..04cbf3c2 100644
--- a/lib/aarch64/xlat_tables.c
+++ b/lib/aarch64/xlat_tables.c
@@ -1,351 +1,14 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
-#include <arch.h>
-#include <arch_helpers.h>
-#include <assert.h>
-#include <cassert.h>
-#include <platform_def.h>
-#include <string.h>
-#include <xlat_tables.h>
-
-
-#ifndef DEBUG_XLAT_TABLE
-#define DEBUG_XLAT_TABLE 0
-#endif
-
-#if DEBUG_XLAT_TABLE
-#define debug_print(...) printf(__VA_ARGS__)
-#else
-#define debug_print(...) ((void)0)
-#endif
-
-CASSERT(ADDR_SPACE_SIZE > 0, assert_valid_addr_space_size);
-
-#define UNSET_DESC	~0ul
-
-#define NUM_L1_ENTRIES (ADDR_SPACE_SIZE >> L1_XLAT_ADDRESS_SHIFT)
-
-static uint64_t l1_xlation_table[NUM_L1_ENTRIES]
-__aligned(NUM_L1_ENTRIES * sizeof(uint64_t));
-
-static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES]
-__aligned(XLAT_TABLE_SIZE) __attribute__((section("xlat_table")));
-
-static unsigned next_xlat;
-static unsigned long max_pa;
-static unsigned long max_va;
-static unsigned long tcr_ps_bits;
-
 /*
- * Array of all memory regions stored in order of ascending base address.
- * The list is terminated by the first entry with size == 0.
+ * This file is deprecated and is retained here only for compatibility.
+ * The xlat_tables library can be found in `lib/xlat_tables` directory.
  */
-static mmap_region_t mmap[MAX_MMAP_REGIONS + 1];
-
-
-static void print_mmap(void)
-{
-#if DEBUG_XLAT_TABLE
-	debug_print("mmap:\n");
-	mmap_region_t *mm = mmap;
-	while (mm->size) {
-		debug_print(" %010lx %010lx %10lx %x\n", mm->base_va,
-					mm->base_pa, mm->size, mm->attr);
-		++mm;
-	};
-	debug_print("\n");
+#if !ERROR_DEPRECATED
+#include "../xlat_tables/xlat_tables_common.c"
+#include "../xlat_tables/aarch64/xlat_tables.c"
 #endif
-}
-
-void mmap_add_region(unsigned long base_pa, unsigned long base_va,
-			unsigned long size, unsigned attr)
-{
-	mmap_region_t *mm = mmap;
-	mmap_region_t *mm_last = mm + sizeof(mmap) / sizeof(mmap[0]) - 1;
-	unsigned long pa_end = base_pa + size - 1;
-	unsigned long va_end = base_va + size - 1;
-
-	assert(IS_PAGE_ALIGNED(base_pa));
-	assert(IS_PAGE_ALIGNED(base_va));
-	assert(IS_PAGE_ALIGNED(size));
-
-	if (!size)
-		return;
-
-	/* Find correct place in mmap to insert new region */
-	while (mm->base_va < base_va && mm->size)
-		++mm;
-
-	/* Make room for new region by moving other regions up by one place */
-	memmove(mm + 1, mm, (uintptr_t)mm_last - (uintptr_t)mm);
-
-	/* Check we haven't lost the empty sentinal from the end of the array */
-	assert(mm_last->size == 0);
-
-	mm->base_pa = base_pa;
-	mm->base_va = base_va;
-	mm->size = size;
-	mm->attr = attr;
-
-	if (pa_end > max_pa)
-		max_pa = pa_end;
-	if (va_end > max_va)
-		max_va = va_end;
-}
-
-void mmap_add(const mmap_region_t *mm)
-{
-	while (mm->size) {
-		mmap_add_region(mm->base_pa, mm->base_va, mm->size, mm->attr);
-		++mm;
-	}
-}
-
-static unsigned long mmap_desc(unsigned attr, unsigned long addr_pa,
-					unsigned level)
-{
-	unsigned long desc = addr_pa;
-
-	desc |= level == 3 ? TABLE_DESC : BLOCK_DESC;
-
-	desc |= attr & MT_NS ? LOWER_ATTRS(NS) : 0;
-
-	desc |= attr & MT_RW ? LOWER_ATTRS(AP_RW) : LOWER_ATTRS(AP_RO);
-
-	desc |= LOWER_ATTRS(ACCESS_FLAG);
-
-	if (attr & MT_MEMORY) {
-		desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
-		if (attr & MT_RW)
-			desc |= UPPER_ATTRS(XN);
-	} else {
-		desc |= LOWER_ATTRS(ATTR_DEVICE_INDEX | OSH);
-		desc |= UPPER_ATTRS(XN);
-	}
-
-	debug_print(attr & MT_MEMORY ? "MEM" : "DEV");
-	debug_print(attr & MT_RW ? "-RW" : "-RO");
-	debug_print(attr & MT_NS ? "-NS" : "-S");
-
-	return desc;
-}
-
-static int mmap_region_attr(mmap_region_t *mm, unsigned long base_va,
-					unsigned long size)
-{
-	int attr = mm->attr;
-
-	for (;;) {
-		++mm;
-
-		if (!mm->size)
-			return attr; /* Reached end of list */
-
-		if (mm->base_va >= base_va + size)
-			return attr; /* Next region is after area so end */
-
-		if (mm->base_va + mm->size <= base_va)
-			continue; /* Next region has already been overtaken */
-
-		if ((mm->attr & attr) == attr)
-			continue; /* Region doesn't override attribs so skip */
-
-		attr &= mm->attr;
-
-		if (mm->base_va > base_va ||
-			mm->base_va + mm->size < base_va + size)
-			return -1; /* Region doesn't fully cover our area */
-	}
-}
-
-static mmap_region_t *init_xlation_table(mmap_region_t *mm,
-					unsigned long base_va,
-					unsigned long *table, unsigned level)
-{
-	unsigned level_size_shift = L1_XLAT_ADDRESS_SHIFT - (level - 1) *
-						XLAT_TABLE_ENTRIES_SHIFT;
-	unsigned level_size = 1 << level_size_shift;
-	unsigned long level_index_mask = XLAT_TABLE_ENTRIES_MASK << level_size_shift;
-
-	assert(level <= 3);
-
-	debug_print("New xlat table:\n");
-
-	do  {
-		unsigned long desc = UNSET_DESC;
-
-		if (mm->base_va + mm->size <= base_va) {
-			/* Area now after the region so skip it */
-			++mm;
-			continue;
-		}
-
-		debug_print("      %010lx %8lx " + 6 - 2 * level, base_va,
-				level_size);
-
-		if (mm->base_va >= base_va + level_size) {
-			/* Next region is after area so nothing to map yet */
-			desc = INVALID_DESC;
-		} else if (mm->base_va <= base_va && mm->base_va + mm->size >=
-				base_va + level_size) {
-			/* Next region covers all of area */
-			int attr = mmap_region_attr(mm, base_va, level_size);
-			if (attr >= 0)
-				desc = mmap_desc(attr,
-					base_va - mm->base_va + mm->base_pa,
-					level);
-		}
-		/* else Next region only partially covers area, so need */
-
-		if (desc == UNSET_DESC) {
-			/* Area not covered by a region so need finer table */
-			unsigned long *new_table = xlat_tables[next_xlat++];
-			assert(next_xlat <= MAX_XLAT_TABLES);
-			desc = TABLE_DESC | (unsigned long)new_table;
-
-			/* Recurse to fill in new table */
-			mm = init_xlation_table(mm, base_va,
-						new_table, level+1);
-		}
-
-		debug_print("\n");
-
-		*table++ = desc;
-		base_va += level_size;
-	} while (mm->size && (base_va & level_index_mask));
-
-	return mm;
-}
-
-static unsigned int calc_physical_addr_size_bits(unsigned long max_addr)
-{
-	/* Physical address can't exceed 48 bits */
-	assert((max_addr & ADDR_MASK_48_TO_63) == 0);
-
-	/* 48 bits address */
-	if (max_addr & ADDR_MASK_44_TO_47)
-		return TCR_PS_BITS_256TB;
-
-	/* 44 bits address */
-	if (max_addr & ADDR_MASK_42_TO_43)
-		return TCR_PS_BITS_16TB;
-
-	/* 42 bits address */
-	if (max_addr & ADDR_MASK_40_TO_41)
-		return TCR_PS_BITS_4TB;
-
-	/* 40 bits address */
-	if (max_addr & ADDR_MASK_36_TO_39)
-		return TCR_PS_BITS_1TB;
-
-	/* 36 bits address */
-	if (max_addr & ADDR_MASK_32_TO_35)
-		return TCR_PS_BITS_64GB;
-
-	return TCR_PS_BITS_4GB;
-}
-
-void init_xlat_tables(void)
-{
-	print_mmap();
-	init_xlation_table(mmap, 0, l1_xlation_table, 1);
-	tcr_ps_bits = calc_physical_addr_size_bits(max_pa);
-	assert(max_va < ADDR_SPACE_SIZE);
-}
-
-/*******************************************************************************
- * Macro generating the code for the function enabling the MMU in the given
- * exception level, assuming that the pagetables have already been created.
- *
- *   _el:		Exception level at which the function will run
- *   _tcr_extra:	Extra bits to set in the TCR register. This mask will
- *			be OR'ed with the default TCR value.
- *   _tlbi_fct:		Function to invalidate the TLBs at the current
- *			exception level
- ******************************************************************************/
-#define DEFINE_ENABLE_MMU_EL(_el, _tcr_extra, _tlbi_fct)		\
-	void enable_mmu_el##_el(uint32_t flags)				\
-	{								\
-		uint64_t mair, tcr, ttbr;				\
-		uint32_t sctlr;						\
-									\
-		assert(IS_IN_EL(_el));					\
-		assert((read_sctlr_el##_el() & SCTLR_M_BIT) == 0);	\
-									\
-		/* Set attributes in the right indices of the MAIR */	\
-		mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);	\
-		mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR,		\
-				ATTR_IWBWA_OWBWA_NTR_INDEX);		\
-		write_mair_el##_el(mair);				\
-									\
-		/* Invalidate TLBs at the current exception level */	\
-		_tlbi_fct();						\
-									\
-		/* Set TCR bits as well. */				\
-		/* Inner & outer WBWA & shareable + T0SZ = 32 */	\
-		tcr = TCR_SH_INNER_SHAREABLE | TCR_RGN_OUTER_WBA |	\
-			TCR_RGN_INNER_WBA |				\
-			(64 - __builtin_ctzl(ADDR_SPACE_SIZE));		\
-		tcr |= _tcr_extra;					\
-		write_tcr_el##_el(tcr);					\
-									\
-		/* Set TTBR bits as well */				\
-		ttbr = (uint64_t) l1_xlation_table;			\
-		write_ttbr0_el##_el(ttbr);				\
-									\
-		/* Ensure all translation table writes have drained */	\
-		/* into memory, the TLB invalidation is complete, */	\
-		/* and translation register writes are committed */	\
-		/* before enabling the MMU */				\
-		dsb();							\
-		isb();							\
-									\
-		sctlr = read_sctlr_el##_el();				\
-		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;			\
-									\
-		if (flags & DISABLE_DCACHE)				\
-			sctlr &= ~SCTLR_C_BIT;				\
-		else							\
-			sctlr |= SCTLR_C_BIT;				\
-									\
-		write_sctlr_el##_el(sctlr);				\
-									\
-		/* Ensure the MMU enable takes effect immediately */	\
-		isb();							\
-	}
-
-/* Define EL1 and EL3 variants of the function enabling the MMU */
-DEFINE_ENABLE_MMU_EL(1,
-		(tcr_ps_bits << TCR_EL1_IPS_SHIFT),
-		tlbivmalle1)
-DEFINE_ENABLE_MMU_EL(3,
-		TCR_EL3_RES1 | (tcr_ps_bits << TCR_EL3_PS_SHIFT),
-		tlbialle3)
diff --git a/lib/compiler-rt/LICENSE.TXT b/lib/compiler-rt/LICENSE.TXT
new file mode 100644
index 00000000..a17dc12b
--- /dev/null
+++ b/lib/compiler-rt/LICENSE.TXT
@@ -0,0 +1,91 @@
+==============================================================================
+compiler_rt License
+==============================================================================
+
+The compiler_rt library is dual licensed under both the University of Illinois
+"BSD-Like" license and the MIT license.  As a user of this code you may choose
+to use it under either license.  As a contributor, you agree to allow your code
+to be used under both.
+
+Full text of the relevant licenses is included below.
+
+==============================================================================
+
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
+
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
diff --git a/lib/compiler-rt/builtins/arm/aeabi_uldivmod.S b/lib/compiler-rt/builtins/arm/aeabi_uldivmod.S
new file mode 100644
index 00000000..be343b6b
--- /dev/null
+++ b/lib/compiler-rt/builtins/arm/aeabi_uldivmod.S
@@ -0,0 +1,46 @@
+//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+// struct { uint64_t quot, uint64_t rem}
+//        __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) {
+//   uint64_t rem, quot;
+//   quot = __udivmoddi4(numerator, denominator, &rem);
+//   return {quot, rem};
+// }
+
+#if defined(__MINGW32__)
+#define __aeabi_uldivmod __rt_udiv64
+#endif
+
+        .syntax unified
+        .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+        push	{r6, lr}
+        sub	sp, sp, #16
+        add	r6, sp, #8
+        str	r6, [sp]
+#if defined(__MINGW32__)
+        movs    r6, r0
+        movs    r0, r2
+        movs    r2, r6
+        movs    r6, r1
+        movs    r1, r3
+        movs    r3, r6
+#endif
+        bl	SYMBOL_NAME(__udivmoddi4)
+        ldr	r2, [sp, #8]
+        ldr	r3, [sp, #12]
+        add	sp, sp, #16
+        pop	{r6, pc}
+END_COMPILERRT_FUNCTION(__aeabi_uldivmod)
+
+NO_EXEC_STACK_DIRECTIVE
+
diff --git a/lib/compiler-rt/builtins/assembly.h b/lib/compiler-rt/builtins/assembly.h
new file mode 100644
index 00000000..29d9f884
--- /dev/null
+++ b/lib/compiler-rt/builtins/assembly.h
@@ -0,0 +1,169 @@
+/* ===-- assembly.h - compiler-rt assembler support macros -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file defines macros for use in compiler-rt assembler source.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__)
+#define SEPARATOR @
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN(name) .private_extern name
+#define LOCAL_LABEL(name) L_##name
+// tell linker it can break up file at label boundaries
+#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols
+#define SYMBOL_IS_FUNC(name)
+#define CONST_SECTION .const
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__ELF__)
+
+#define HIDDEN(name) .hidden name
+#define LOCAL_LABEL(name) .L_##name
+#define FILE_LEVEL_DIRECTIVE
+#if defined(__arm__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define CONST_SECTION .section .rodata
+
+#if defined(__GNU__) || defined(__ANDROID__) || defined(__FreeBSD__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
+#else // !__APPLE__ && !__ELF__
+
+#define HIDDEN(name)
+#define LOCAL_LABEL(name) .L ## name
+#define FILE_LEVEL_DIRECTIVE
+#define SYMBOL_IS_FUNC(name)                                                   \
+  .def name SEPARATOR                                                          \
+    .scl 2 SEPARATOR                                                           \
+    .type 32 SEPARATOR                                                         \
+  .endef
+#define CONST_SECTION .section .rdata,"rd"
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#endif
+
+#if defined(__arm__)
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+#if !defined(__ARM_FEATURE_CLZ) && __ARM_ARCH_ISA_THUMB != 1 &&                \
+    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+#define __ARM_FEATURE_CLZ
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#define JMPc(r, c) bx##c r
+#else
+#define JMP(r) mov pc, r
+#define JMPc(r, c) mov##c pc, r
+#endif
+
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC()                                                               \
+  pop {ip};                                                                    \
+  JMP(ip)
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT(cond)  it cond
+#define ITT(cond) itt cond
+#else
+#define IT(cond)
+#define ITT(cond)
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define WIDE(op) op.w
+#else
+#define WIDE(op) op
+#endif
+#endif
+
+#define GLUE2(a, b) a##b
+#define GLUE(a, b) GLUE2(a, b)
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name)                                        \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name)                                       \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name)                                              \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
+  .thumb_func SEPARATOR                                                        \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \
+  .globl name SEPARATOR                                                        \
+  SYMBOL_IS_FUNC(name) SEPARATOR                                               \
+  HIDDEN(name) SEPARATOR                                                       \
+  name:
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR                       \
+  .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \
+  DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#ifdef __ELF__
+#define END_COMPILERRT_FUNCTION(name)                                          \
+  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#else
+#define END_COMPILERRT_FUNCTION(name)
+#endif
+
+#endif /* COMPILERRT_ASSEMBLY_H */
diff --git a/lib/compiler-rt/builtins/ctzdi2.c b/lib/compiler-rt/builtins/ctzdi2.c
new file mode 100644
index 00000000..db3c6fdc
--- /dev/null
+++ b/lib/compiler-rt/builtins/ctzdi2.c
@@ -0,0 +1,29 @@
+/* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __ctzdi2 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Returns: the number of trailing 0-bits  */
+
+/* Precondition: a != 0 */
+
+COMPILER_RT_ABI si_int
+__ctzdi2(di_int a)
+{
+    dwords x;
+    x.all = a;
+    const si_int f = -(x.s.low == 0);
+    return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) +
+              (f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
+}
diff --git a/lib/compiler-rt/builtins/int_endianness.h b/lib/compiler-rt/builtins/int_endianness.h
new file mode 100644
index 00000000..7995ddbb
--- /dev/null
+++ b/lib/compiler-rt/builtins/int_endianness.h
@@ -0,0 +1,116 @@
+/* ===-- int_endianness.h - configuration header for compiler-rt ------------===
+ *
+ *		       The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_ENDIANNESS_H
+#define INT_ENDIANNESS_H
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+    defined(__ORDER_LITTLE_ENDIAN__)
+
+/* Clang and GCC provide built-in endianness definitions. */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* __BYTE_ORDER__ */
+
+#else /* Compilers other than Clang or GCC. */
+
+#if defined(__SVR4) && defined(__sun)
+#include <sys/byteorder.h>
+
+#if defined(_BIG_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif defined(_LITTLE_ENDIAN)
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#else /* !_LITTLE_ENDIAN */
+#error "unknown endianness"
+#endif /* !_LITTLE_ENDIAN */
+
+#endif /* Solaris and AuroraUX. */
+
+/* .. */
+
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) ||   \
+    defined(__minix)
+#include <sys/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* _BYTE_ORDER */
+
+#endif /* *BSD */
+
+#if defined(__OpenBSD__) || defined(__Bitrig__)
+#include <machine/endian.h>
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#elif _BYTE_ORDER == _LITTLE_ENDIAN
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif /* _BYTE_ORDER */
+
+#endif /* OpenBSD and Bitrig. */
+
+/* .. */
+
+/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the
+ * compiler (at least with GCC) */
+#if defined(__APPLE__) || defined(__ellcc__ )
+
+#ifdef __BIG_ENDIAN__
+#if __BIG_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 0
+#define _YUGA_BIG_ENDIAN    1
+#endif
+#endif /* __BIG_ENDIAN__ */
+
+#ifdef __LITTLE_ENDIAN__
+#if __LITTLE_ENDIAN__
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+#endif
+#endif /* __LITTLE_ENDIAN__ */
+
+#endif /* Mac OSX */
+
+/* .. */
+
+#if defined(_WIN32)
+
+#define _YUGA_LITTLE_ENDIAN 1
+#define _YUGA_BIG_ENDIAN    0
+
+#endif /* Windows */
+
+#endif /* Clang or GCC. */
+
+/* . */
+
+#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
+#error Unable to determine endian
+#endif /* Check we found an endianness correctly. */
+
+#endif /* INT_ENDIANNESS_H */
diff --git a/lib/compiler-rt/builtins/int_lib.h b/lib/compiler-rt/builtins/int_lib.h
new file mode 100644
index 00000000..57dfc413
--- /dev/null
+++ b/lib/compiler-rt/builtins/int_lib.h
@@ -0,0 +1,127 @@
+/* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is a configuration header for compiler-rt.
+ * This file is not part of the interface of this library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+/*
+ * Portions copyright (c) 2017, ARM Limited and Contributors.
+ * All rights reserved.
+ */
+
+#ifndef INT_LIB_H
+#define INT_LIB_H
+
+/* Assumption: Signed integral is 2's complement. */
+/* Assumption: Right shift of signed negative is arithmetic shift. */
+/* Assumption: Endianness is little or big (not mixed). */
+
+#if defined(__ELF__)
+#define FNALIAS(alias_name, original_name) \
+  void alias_name() __attribute__((alias(#original_name)))
+#else
+#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
+#endif
+
+/* ABI macro definitions */
+
+#if __ARM_EABI__
+# define ARM_EABI_FNALIAS(aeabi_name, name)         \
+  void __aeabi_##aeabi_name() __attribute__((alias("__" #name)));
+# ifdef COMPILER_RT_ARMHF_TARGET
+#   define COMPILER_RT_ABI
+# else
+#   define COMPILER_RT_ABI __attribute__((pcs("aapcs")))
+# endif
+#else
+# define ARM_EABI_FNALIAS(aeabi_name, name)
+# define COMPILER_RT_ABI
+#endif
+
+#ifdef _MSC_VER
+#define ALWAYS_INLINE __forceinline
+#define NOINLINE __declspec(noinline)
+#define NORETURN __declspec(noreturn)
+#define UNUSED
+#else
+#define ALWAYS_INLINE __attribute__((always_inline))
+#define NOINLINE __attribute__((noinline))
+#define NORETURN __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#endif
+
+/*
+ * Kernel and boot environment can't use normal headers,
+ * so use the equivalent system headers.
+ */
+#  include <sys/limits.h>
+#  include <sys/stdint.h>
+#  include <sys/types.h>
+
+/* Include the commonly used internal type definitions. */
+#include "int_types.h"
+
+COMPILER_RT_ABI si_int __paritysi2(si_int a);
+COMPILER_RT_ABI si_int __paritydi2(di_int a);
+
+COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
+COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
+COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
+
+COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
+COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
+#ifdef CRT_HAS_128BIT
+COMPILER_RT_ABI si_int __clzti2(ti_int a);
+COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
+#endif
+
+/* Definitions for builtins unavailable on MSVC */
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <intrin.h>
+
+uint32_t __inline __builtin_ctz(uint32_t value) {
+  unsigned long trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, value))
+    return trailing_zero;
+  return 32;
+}
+
+uint32_t __inline __builtin_clz(uint32_t value) {
+  unsigned long leading_zero = 0;
+  if (_BitScanReverse(&leading_zero, value))
+    return 31 - leading_zero;
+  return 32;
+}
+
+#if defined(_M_ARM) || defined(_M_X64)
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  unsigned long leading_zero = 0;
+  if (_BitScanReverse64(&leading_zero, value))
+    return 63 - leading_zero;
+  return 64;
+}
+#else
+uint32_t __inline __builtin_clzll(uint64_t value) {
+  if (value == 0)
+    return 64;
+  uint32_t msh = (uint32_t)(value >> 32);
+  uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
+  if (msh != 0)
+    return __builtin_clz(msh);
+  return 32 + __builtin_clz(lsh);
+}
+#endif
+
+#define __builtin_clzl __builtin_clzll
+#endif /* defined(_MSC_VER) && !defined(__clang__) */
+
+#endif /* INT_LIB_H */
diff --git a/lib/compiler-rt/builtins/int_math.h b/lib/compiler-rt/builtins/int_math.h
new file mode 100644
index 00000000..fc81fb7f
--- /dev/null
+++ b/lib/compiler-rt/builtins/int_math.h
@@ -0,0 +1,114 @@
+/* ===-- int_math.h - internal math inlines ---------------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===-----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines substitutes for the libm functions used in some of the
+ * compiler-rt implementations, defined in such a way that there is not a direct
+ * dependency on libm or math.h. Instead, we use the compiler builtin versions
+ * where available. This reduces our dependencies on the system SDK by foisting
+ * the responsibility onto the compiler.
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#ifndef INT_MATH_H
+#define INT_MATH_H
+
+#ifndef __has_builtin
+#  define  __has_builtin(x) 0
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#include <math.h>
+#include <stdlib.h>
+#include <ymath.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CRT_INFINITY INFINITY
+#else
+#define CRT_INFINITY __builtin_huge_valf()
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_isfinite(x) _finite((x))
+#define crt_isinf(x) !_finite((x))
+#define crt_isnan(x) _isnan((x))
+#else
+/* Define crt_isfinite in terms of the builtin if available, otherwise provide
+ * an alternate version in terms of our other functions. This supports some
+ * versions of GCC which didn't have __builtin_isfinite.
+ */
+#if __has_builtin(__builtin_isfinite)
+#  define crt_isfinite(x) __builtin_isfinite((x))
+#elif defined(__GNUC__)
+#  define crt_isfinite(x) \
+  __extension__(({ \
+      __typeof((x)) x_ = (x); \
+      !crt_isinf(x_) && !crt_isnan(x_); \
+    }))
+#else
+#  error "Do not know how to check for infinity"
+#endif /* __has_builtin(__builtin_isfinite) */
+#define crt_isinf(x) __builtin_isinf((x))
+#define crt_isnan(x) __builtin_isnan((x))
+#endif /* _MSC_VER */
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_copysign(x, y) copysign((x), (y))
+#define crt_copysignf(x, y) copysignf((x), (y))
+#define crt_copysignl(x, y) copysignl((x), (y))
+#else
+#define crt_copysign(x, y) __builtin_copysign((x), (y))
+#define crt_copysignf(x, y) __builtin_copysignf((x), (y))
+#define crt_copysignl(x, y) __builtin_copysignl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fabs(x) fabs((x))
+#define crt_fabsf(x) fabsf((x))
+#define crt_fabsl(x) fabs((x))
+#else
+#define crt_fabs(x) __builtin_fabs((x))
+#define crt_fabsf(x) __builtin_fabsf((x))
+#define crt_fabsl(x) __builtin_fabsl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_fmax(x, y) __max((x), (y))
+#define crt_fmaxf(x, y) __max((x), (y))
+#define crt_fmaxl(x, y) __max((x), (y))
+#else
+#define crt_fmax(x, y) __builtin_fmax((x), (y))
+#define crt_fmaxf(x, y) __builtin_fmaxf((x), (y))
+#define crt_fmaxl(x, y) __builtin_fmaxl((x), (y))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_logb(x) logb((x))
+#define crt_logbf(x) logbf((x))
+#define crt_logbl(x) logbl((x))
+#else
+#define crt_logb(x) __builtin_logb((x))
+#define crt_logbf(x) __builtin_logbf((x))
+#define crt_logbl(x) __builtin_logbl((x))
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define crt_scalbn(x, y) scalbn((x), (y))
+#define crt_scalbnf(x, y) scalbnf((x), (y))
+#define crt_scalbnl(x, y) scalbnl((x), (y))
+#else
+#define crt_scalbn(x, y) __builtin_scalbn((x), (y))
+#define crt_scalbnf(x, y) __builtin_scalbnf((x), (y))
+#define crt_scalbnl(x, y) __builtin_scalbnl((x), (y))
+#endif
+
+#endif /* INT_MATH_H */
diff --git a/lib/compiler-rt/builtins/int_types.h b/lib/compiler-rt/builtins/int_types.h
new file mode 100644
index 00000000..660385ec
--- /dev/null
+++ b/lib/compiler-rt/builtins/int_types.h
@@ -0,0 +1,166 @@
+/* ===-- int_lib.h - configuration header for compiler-rt  -----------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file is not part of the interface of this library.
+ *
+ * This file defines various standard types, most importantly a number of unions
+ * used to access parts of larger types.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef INT_TYPES_H
+#define INT_TYPES_H
+
+#include "int_endianness.h"
+
+/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
+#ifdef si_int
+#undef si_int
+#endif
+typedef      int si_int;
+typedef unsigned su_int;
+
+typedef          long long di_int;
+typedef unsigned long long du_int;
+
+typedef union
+{
+    di_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        si_int high;
+#else
+        si_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} dwords;
+
+typedef union
+{
+    du_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        su_int low;
+        su_int high;
+#else
+        su_int high;
+        su_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} udwords;
+
+/* MIPS64 issue: PR 20098 */
+#if (defined(__LP64__) || defined(__wasm__)) && \
+    !(defined(__mips__) && defined(__clang__))
+#define CRT_HAS_128BIT
+#endif
+
+#ifdef CRT_HAS_128BIT
+typedef int      ti_int __attribute__ ((mode (TI)));
+typedef unsigned tu_int __attribute__ ((mode (TI)));
+
+typedef union
+{
+    ti_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        di_int high;
+#else
+        di_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} twords;
+
+typedef union
+{
+    tu_int all;
+    struct
+    {
+#if _YUGA_LITTLE_ENDIAN
+        du_int low;
+        du_int high;
+#else
+        du_int high;
+        du_int low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+    }s;
+} utwords;
+
+static __inline ti_int make_ti(di_int h, di_int l) {
+    twords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+static __inline tu_int make_tu(du_int h, du_int l) {
+    utwords r;
+    r.s.high = h;
+    r.s.low = l;
+    return r.all;
+}
+
+#endif /* CRT_HAS_128BIT */
+
+typedef union
+{
+    su_int u;
+    float f;
+} float_bits;
+
+typedef union
+{
+    udwords u;
+    double  f;
+} double_bits;
+
+typedef struct
+{
+#if _YUGA_LITTLE_ENDIAN
+    udwords low;
+    udwords high;
+#else
+    udwords high;
+    udwords low;
+#endif /* _YUGA_LITTLE_ENDIAN */
+} uqwords;
+
+typedef union
+{
+    uqwords     u;
+    long double f;
+} long_double_bits;
+
+#if __STDC_VERSION__ >= 199901L
+typedef float _Complex Fcomplex;
+typedef double _Complex Dcomplex;
+typedef long double _Complex Lcomplex;
+
+#define COMPLEX_REAL(x) __real__(x)
+#define COMPLEX_IMAGINARY(x) __imag__(x)
+#else
+typedef struct { float real, imaginary; } Fcomplex;
+
+typedef struct { double real, imaginary; } Dcomplex;
+
+typedef struct { long double real, imaginary; } Lcomplex;
+
+#define COMPLEX_REAL(x) (x).real
+#define COMPLEX_IMAGINARY(x) (x).imaginary
+#endif
+#endif /* INT_TYPES_H */
+
diff --git a/lib/compiler-rt/builtins/udivmoddi4.c b/lib/compiler-rt/builtins/udivmoddi4.c
new file mode 100644
index 00000000..0c8b4ff4
--- /dev/null
+++ b/lib/compiler-rt/builtins/udivmoddi4.c
@@ -0,0 +1,231 @@
+/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#include "int_lib.h"
+
+/* Effects: if rem != 0, *rem = a % b
+ * Returns: a / b
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI du_int
+__udivmoddi4(du_int a, du_int b, du_int* rem)
+{
+    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+    udwords n;
+    n.all = a;
+    udwords d;
+    d.all = b;
+    udwords q;
+    udwords r;
+    unsigned sr;
+    /* special cases, X is unknown, K != 0 */
+    if (n.s.high == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* 0 X
+             * ---
+             * 0 X
+             */
+            if (rem)
+                *rem = n.s.low % d.s.low;
+            return n.s.low / d.s.low;
+        }
+        /* 0 X
+         * ---
+         * K X
+         */
+        if (rem)
+            *rem = n.s.low;
+        return 0;
+    }
+    /* n.s.high != 0 */
+    if (d.s.low == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 0
+             */ 
+            if (rem)
+                *rem = n.s.high % d.s.low;
+            return n.s.high / d.s.low;
+        }
+        /* d.s.high != 0 */
+        if (n.s.low == 0)
+        {
+            /* K 0
+             * ---
+             * K 0
+             */
+            if (rem)
+            {
+                r.s.high = n.s.high % d.s.high;
+                r.s.low = 0;
+                *rem = r.all;
+            }
+            return n.s.high / d.s.high;
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
+        {
+            if (rem)
+            {
+                r.s.low = n.s.low;
+                r.s.high = n.s.high & (d.s.high - 1);
+                *rem = r.all;
+            }
+            return n.s.high >> __builtin_ctz(d.s.high);
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+        /* 0 <= sr <= n_uword_bits - 2 or sr large */
+        if (sr > n_uword_bits - 2)
+        {
+           if (rem)
+                *rem = n.all;
+            return 0;
+        }
+        ++sr;
+        /* 1 <= sr <= n_uword_bits - 1 */
+        /* q.all = n.all << (n_udword_bits - sr); */
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_uword_bits - sr);
+        /* r.all = n.all >> sr; */
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+    }
+    else  /* d.s.low != 0 */
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 K
+             */
+            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
+            {
+                if (rem)
+                    *rem = n.s.low & (d.s.low - 1);
+                if (d.s.low == 1)
+                    return n.all;
+                sr = __builtin_ctz(d.s.low);
+                q.s.high = n.s.high >> sr;
+                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+                return q.all;
+            }
+            /* K X
+             * ---
+             * 0 K
+             */
+            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
+            /* 2 <= sr <= n_udword_bits - 1
+             * q.all = n.all << (n_udword_bits - sr);
+             * r.all = n.all >> sr;
+             */
+            if (sr == n_uword_bits)
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
+            {
+                q.s.low = n.s.low << (n_udword_bits - sr);
+                q.s.high = (n.s.high << (n_udword_bits - sr)) |
+                           (n.s.low >> (sr - n_uword_bits));
+                r.s.high = 0;
+                r.s.low = n.s.high >> (sr - n_uword_bits);
+            }
+        }
+        else
+        {
+            /* K X
+             * ---
+             * K K
+             */
+            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+            /* 0 <= sr <= n_uword_bits - 1 or sr large */
+            if (sr > n_uword_bits - 1)
+            {
+                if (rem)
+                    *rem = n.all;
+                return 0;
+            }
+            ++sr;
+            /* 1 <= sr <= n_uword_bits */
+            /*  q.all = n.all << (n_udword_bits - sr); */
+            q.s.low = 0;
+            if (sr == n_uword_bits)
+            {
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else
+            {
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+        }
+    }
+    /* Not a special case
+     * q and r are initialized with:
+     * q.all = n.all << (n_udword_bits - sr);
+     * r.all = n.all >> sr;
+     * 1 <= sr <= n_udword_bits - 1
+     */
+    su_int carry = 0;
+    for (; sr > 0; --sr)
+    {
+        /* r:q = ((r:q)  << 1) | carry */
+        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
+        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
+        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
+        q.s.low  = (q.s.low  << 1) | carry;
+        /* carry = 0;
+         * if (r.all >= d.all)
+         * {
+         *      r.all -= d.all;
+         *      carry = 1;
+         * }
+         */
+        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+        carry = s & 1;
+        r.all -= d.all & s;
+    }
+    q.all = (q.all << 1) | carry;
+    if (rem)
+        *rem = r.all;
+    return q.all;
+}
diff --git a/lib/compiler-rt/compiler-rt.mk b/lib/compiler-rt/compiler-rt.mk
new file mode 100644
index 00000000..cb5ab31c
--- /dev/null
+++ b/lib/compiler-rt/compiler-rt.mk
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# Neither the name of ARM nor the names of its contributors may be used
+# to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+ifeq (${ARCH},aarch32)
+COMPILER_RT_SRCS	:=	lib/compiler-rt/builtins/arm/aeabi_uldivmod.S	\
+				lib/compiler-rt/builtins/udivmoddi4.c		\
+				lib/compiler-rt/builtins/ctzdi2.c
+endif
diff --git a/lib/cpus/aarch32/aem_generic.S b/lib/cpus/aarch32/aem_generic.S
new file mode 100644
index 00000000..5f3d7447
--- /dev/null
+++ b/lib/cpus/aarch32/aem_generic.S
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <aem_generic.h>
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cpu_macros.S>
+
+func aem_generic_core_pwr_dwn
+	/* Assert if cache is enabled */
+#if ENABLE_ASSERTIONS
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+	/* ---------------------------------------------
+	 * Flush L1 cache to PoU.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	b	dcsw_op_louis
+endfunc aem_generic_core_pwr_dwn
+
+
+func aem_generic_cluster_pwr_dwn
+	/* Assert if cache is enabled */
+#if ENABLE_ASSERTIONS
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+	/* ---------------------------------------------
+	 * Flush L1 and L2 caches to PoC.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	b	dcsw_op_all
+endfunc aem_generic_cluster_pwr_dwn
+
+/* cpu_ops for Base AEM FVP */
+declare_cpu_ops aem_generic, BASE_AEM_MIDR, CPU_NO_RESET_FUNC, \
+	aem_generic_core_pwr_dwn, \
+	aem_generic_cluster_pwr_dwn
diff --git a/lib/cpus/aarch32/cortex_a32.S b/lib/cpus/aarch32/cortex_a32.S
new file mode 100644
index 00000000..2b6df272
--- /dev/null
+++ b/lib/cpus/aarch32/cortex_a32.S
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a32.h>
+#include <cpu_macros.S>
+
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * Clobbers: r0-r1
+	 * ---------------------------------------------
+	 */
+func cortex_a32_disable_smp
+	ldcopr16	r0, r1, CORTEX_A32_CPUECTLR_EL1
+	bic	r0, r0, #CORTEX_A32_CPUECTLR_SMPEN_BIT
+	stcopr16	r0, r1, CORTEX_A32_CPUECTLR_EL1
+	isb
+	dsb	sy
+	bx	lr
+endfunc cortex_a32_disable_smp
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A32.
+	 * Clobbers: r0-r1
+	 * -------------------------------------------------
+	 */
+func cortex_a32_reset_func
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	ldcopr16	r0, r1, CORTEX_A32_CPUECTLR_EL1
+	orr	r0, r0, #CORTEX_A32_CPUECTLR_SMPEN_BIT
+	stcopr16	r0, r1, CORTEX_A32_CPUECTLR_EL1
+	isb
+	bx	lr
+endfunc cortex_a32_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Cortex-A32.
+	 * Clobbers: r0-r3
+	 * ----------------------------------------------------
+	 */
+func cortex_a32_core_pwr_dwn
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ENABLE_ASSERTIONS
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a32_disable_smp
+endfunc cortex_a32_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Cortex-A32.
+	 * Clobbers: r0-r3
+	 * -------------------------------------------------------
+	 */
+func cortex_a32_cluster_pwr_dwn
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ENABLE_ASSERTIONS
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Flush L1 cache.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L2 cache.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a32_disable_smp
+endfunc cortex_a32_cluster_pwr_dwn
+
+declare_cpu_ops cortex_a32, CORTEX_A32_MIDR, \
+	cortex_a32_reset_func, \
+	cortex_a32_core_pwr_dwn, \
+	cortex_a32_cluster_pwr_dwn
diff --git a/lib/cpus/aarch32/cortex_a53.S b/lib/cpus/aarch32/cortex_a53.S
new file mode 100644
index 00000000..74cedc35
--- /dev/null
+++ b/lib/cpus/aarch32/cortex_a53.S
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a53.h>
+#include <cpu_macros.S>
+#include <debug.h>
+
+#if A53_DISABLE_NON_TEMPORAL_HINT
+#undef ERRATA_A53_836870
+#define ERRATA_A53_836870	1
+#endif
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a53_disable_smp
+	ldcopr16	r0, r1, CORTEX_A53_ECTLR
+	bic64_imm	r0, r1, CORTEX_A53_ECTLR_SMP_BIT
+	stcopr16	r0, r1, CORTEX_A53_ECTLR
+	isb
+	dsb	sy
+	bx	lr
+endfunc cortex_a53_disable_smp
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #826319.
+	 * This applies only to revision <= r0p2 of Cortex A53.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a53_826319_wa
+	/*
+	 * Compare r0 against revision r0p2
+	 */
+	mov	r2, lr
+	bl	check_errata_826319
+	mov	lr, r2
+	cmp	r0, #ERRATA_NOT_APPLIES
+	beq	1f
+	ldcopr	r0, CORTEX_A53_L2ACTLR
+	bic	r0, #CORTEX_A53_L2ACTLR_ENABLE_UNIQUECLEAN
+	orr	r0, #CORTEX_A53_L2ACTLR_DISABLE_CLEAN_PUSH
+	stcopr	r0, CORTEX_A53_L2ACTLR
+1:
+	bx	lr
+endfunc errata_a53_826319_wa
+
+func check_errata_826319
+	mov	r1, #0x02
+	b	cpu_rev_var_ls
+endfunc check_errata_826319
+
+	/* ---------------------------------------------------------------------
+	 * Disable the cache non-temporal hint.
+	 *
+	 * This ignores the Transient allocation hint in the MAIR and treats
+	 * allocations the same as non-transient allocation types. As a result,
+	 * the LDNP and STNP instructions in AArch64 behave the same as the
+	 * equivalent LDP and STP instructions.
+	 *
+	 * This is relevant only for revisions <= r0p3 of Cortex-A53.
+	 * From r0p4 and onwards, the bit to disable the hint is enabled by
+	 * default at reset.
+	 *
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------------------------
+	 */
+func a53_disable_non_temporal_hint
+	/*
+	 * Compare r0 against revision r0p3
+	 */
+	mov		r2, lr
+	bl		check_errata_disable_non_temporal_hint
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A53_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A53_CPUACTLR_DTAH
+	stcopr16	r0, r1, CORTEX_A53_CPUACTLR
+1:
+	bx		lr
+endfunc a53_disable_non_temporal_hint
+
+func check_errata_disable_non_temporal_hint
+	mov	r1, #0x03
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_non_temporal_hint
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #855873.
+	 *
+	 * This applies only to revisions >= r0p3 of Cortex A53.
+	 * Earlier revisions of the core are affected as well, but don't
+	 * have the chicken bit in the CPUACTLR register. It is expected that
+	 * the rich OS takes care of that, especially as the workaround is
+	 * shared with other erratas in those revisions of the CPU.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a53_855873_wa
+	/*
+	 * Compare r0 against revision r0p3 and higher
+	 */
+	mov		r2, lr
+	bl		check_errata_855873
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A53_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A53_CPUACTLR_ENDCCASCI
+	stcopr16	r0, r1, CORTEX_A53_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a53_855873_wa
+
+func check_errata_855873
+	mov	r1, #0x03
+	b	cpu_rev_var_hs
+endfunc check_errata_855873
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A53.
+	 * Shall clobber: r0-r6
+	 * -------------------------------------------------
+	 */
+func cortex_a53_reset_func
+	mov	r5, lr
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+#if ERRATA_A53_826319
+	mov	r0, r4
+	bl	errata_a53_826319_wa
+#endif
+
+#if ERRATA_A53_836870
+	mov	r0, r4
+	bl	a53_disable_non_temporal_hint
+#endif
+
+#if ERRATA_A53_855873
+	mov	r0, r4
+	bl	errata_a53_855873_wa
+#endif
+
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	ldcopr16	r0, r1, CORTEX_A53_ECTLR
+	orr64_imm	r0, r1, CORTEX_A53_ECTLR_SMP_BIT
+	stcopr16	r0, r1,	CORTEX_A53_ECTLR
+	isb
+	bx	r5
+endfunc cortex_a53_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Cortex-A53.
+	 * ----------------------------------------------------
+	 */
+func cortex_a53_core_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a53_disable_smp
+endfunc cortex_a53_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Cortex-A53.
+	 * Clobbers: r0-r3
+	 * -------------------------------------------------------
+	 */
+func cortex_a53_cluster_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L2 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a53_disable_smp
+endfunc cortex_a53_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A53. Must follow AAPCS.
+ */
+func cortex_a53_errata_report
+	push	{r12, lr}
+
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A53_826319, cortex_a53, 826319
+	report_errata ERRATA_A53_836870, cortex_a53, disable_non_temporal_hint
+	report_errata ERRATA_A53_855873, cortex_a53, 855873
+
+	pop	{r12, lr}
+	bx	lr
+endfunc cortex_a53_errata_report
+#endif
+
+declare_cpu_ops cortex_a53, CORTEX_A53_MIDR, \
+	cortex_a53_reset_func, \
+	cortex_a53_core_pwr_dwn, \
+	cortex_a53_cluster_pwr_dwn
diff --git a/lib/cpus/aarch32/cortex_a57.S b/lib/cpus/aarch32/cortex_a57.S
new file mode 100644
index 00000000..b5189e77
--- /dev/null
+++ b/lib/cpus/aarch32/cortex_a57.S
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a57.h>
+#include <cpu_macros.S>
+#include <debug.h>
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * Clobbers: r0-r1
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_smp
+	ldcopr16	r0, r1, CORTEX_A57_ECTLR
+	bic64_imm	r0, r1, CORTEX_A57_ECTLR_SMP_BIT
+	stcopr16	r0, r1, CORTEX_A57_ECTLR
+	bx	lr
+endfunc cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Disable all types of L2 prefetches.
+	 * Clobbers: r0-r2
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_l2_prefetch
+	ldcopr16	r0, r1, CORTEX_A57_ECTLR
+	orr64_imm	r0, r1, CORTEX_A57_ECTLR_DIS_TWD_ACC_PFTCH_BIT
+	bic64_imm	r0, r1, (CORTEX_A57_ECTLR_L2_IPFTCH_DIST_MASK | \
+				 CORTEX_A57_ECTLR_L2_DPFTCH_DIST_MASK)
+	stcopr16	r0, r1, CORTEX_A57_ECTLR
+	isb
+	dsb	ish
+	bx	lr
+endfunc cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func cortex_a57_disable_ext_debug
+	mov	r0, #1
+	stcopr	r0, DBGOSDLR
+	isb
+	dsb	sy
+	bx	lr
+endfunc cortex_a57_disable_ext_debug
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #806969.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a57_806969_wa
+	/*
+	 * Compare r0 against revision r0p0
+	 */
+	mov		r2, lr
+	bl		check_errata_806969
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_NO_ALLOC_WBWA
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx	lr
+endfunc errata_a57_806969_wa
+
+func check_errata_806969
+	mov	r1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_806969
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #813419.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * ---------------------------------------------------
+	 */
+func check_errata_813419
+	/*
+	 * Even though this is only needed for revision r0p0, it
+	 * is always applied due to limitations of the current
+	 * errata framework.
+	 */
+	mov	r0, #ERRATA_APPLIES
+	bx	lr
+endfunc check_errata_813419
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #813420.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_813420_wa
+	/*
+	 * Compare r0 against revision r0p0
+	 */
+	mov		r2, lr
+	bl		check_errata_813420
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_DCC_AS_DCCI
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_813420_wa
+
+func check_errata_813420
+	mov	r1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_813420
+
+	/* --------------------------------------------------------------------
+	 * Disable the over-read from the LDNP instruction.
+	 *
+	 * This applies to all revisions <= r1p2. The performance degradation
+	 * observed with LDNP/STNP has been fixed on r1p3 and onwards.
+	 *
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------------------------
+	 */
+func a57_disable_ldnp_overread
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_disable_ldnp_overread
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_DIS_OVERREAD
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc a57_disable_ldnp_overread
+
+func check_errata_disable_ldnp_overread
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_ldnp_overread
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826974.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826974_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_826974
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_DIS_LOAD_PASS_DMB
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_826974_wa
+
+func check_errata_826974
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826974
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826977.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826977_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_826977
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_GRE_NGRE_AS_NGNRE
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_826977_wa
+
+func check_errata_826977
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826977
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #828024.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_828024_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_828024
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	/*
+	 * Setting the relevant bits in CORTEX_A57_CPUACTLR has to be done in 2
+	 * instructions here because the resulting bitmask doesn't fit in a
+	 * 16-bit value so it cannot be encoded in a single instruction.
+	 */
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_NO_ALLOC_WBWA
+	orr64_imm	r0, r1, (CORTEX_A57_CPUACTLR_DIS_L1_STREAMING | CORTEX_A57_CPUACTLR_DIS_STREAMING)
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_828024_wa
+
+func check_errata_828024
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_828024
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #829520.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_829520_wa
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_829520
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A57_CPUACTLR_DIS_INDIRECT_PREDICTOR
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_829520_wa
+
+func check_errata_829520
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_829520
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #833471.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_833471_wa
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_833471
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r1, r1, CORTEX_A57_CPUACTLR_FORCE_FPSCR_FLUSH
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_833471_wa
+
+func check_errata_833471
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_833471
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #859972.
+	 * This applies only to revision <= r1p3 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_859972_wa
+	mov		r2, lr
+	bl		check_errata_859972
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_CPUACTLR
+	orr64_imm	r1, r1, CORTEX_A57_CPUACTLR_DIS_INSTR_PREFETCH
+	stcopr16	r0, r1, CORTEX_A57_CPUACTLR
+1:
+	bx		lr
+endfunc errata_a57_859972_wa
+
+func check_errata_859972
+	mov	r1, #0x13
+	b	cpu_rev_var_ls
+endfunc check_errata_859972
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A57.
+	 * Shall clobber: r0-r6
+	 * -------------------------------------------------
+	 */
+func cortex_a57_reset_func
+	mov	r5, lr
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+#if ERRATA_A57_806969
+	mov	r0, r4
+	bl	errata_a57_806969_wa
+#endif
+
+#if ERRATA_A57_813420
+	mov	r0, r4
+	bl	errata_a57_813420_wa
+#endif
+
+#if A57_DISABLE_NON_TEMPORAL_HINT
+	mov	r0, r4
+	bl	a57_disable_ldnp_overread
+#endif
+
+#if ERRATA_A57_826974
+	mov	r0, r4
+	bl	errata_a57_826974_wa
+#endif
+
+#if ERRATA_A57_826977
+	mov	r0, r4
+	bl	errata_a57_826977_wa
+#endif
+
+#if ERRATA_A57_828024
+	mov	r0, r4
+	bl	errata_a57_828024_wa
+#endif
+
+#if ERRATA_A57_829520
+	mov	r0, r4
+	bl	errata_a57_829520_wa
+#endif
+
+#if ERRATA_A57_833471
+	mov	r0, r4
+	bl	errata_a57_833471_wa
+#endif
+
+#if ERRATA_A57_859972
+	mov	r0, r4
+	bl	errata_a57_859972_wa
+#endif
+
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	ldcopr16	r0, r1, CORTEX_A57_ECTLR
+	orr64_imm	r0, r1, CORTEX_A57_ECTLR_SMP_BIT
+	stcopr16	r0, r1,	CORTEX_A57_ECTLR
+	isb
+	bx	r5
+endfunc cortex_a57_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Cortex-A57.
+	 * ----------------------------------------------------
+	 */
+func cortex_a57_core_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a57_disable_ext_debug
+endfunc cortex_a57_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Cortex-A57.
+	 * Clobbers: r0-r3
+	 * -------------------------------------------------------
+	 */
+func cortex_a57_cluster_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L2 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a57_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a57_disable_ext_debug
+endfunc cortex_a57_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A57. Must follow AAPCS.
+ */
+func cortex_a57_errata_report
+	push	{r12, lr}
+
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A57_806969, cortex_a57, 806969
+	report_errata ERRATA_A57_813419, cortex_a57, 813419
+	report_errata ERRATA_A57_813420, cortex_a57, 813420
+	report_errata A57_DISABLE_NON_TEMPORAL_HINT, cortex_a57, \
+		disable_ldnp_overread
+	report_errata ERRATA_A57_826974, cortex_a57, 826974
+	report_errata ERRATA_A57_826977, cortex_a57, 826977
+	report_errata ERRATA_A57_828024, cortex_a57, 828024
+	report_errata ERRATA_A57_829520, cortex_a57, 829520
+	report_errata ERRATA_A57_833471, cortex_a57, 833471
+	report_errata ERRATA_A57_859972, cortex_a57, 859972
+
+	pop	{r12, lr}
+	bx	lr
+endfunc cortex_a57_errata_report
+#endif
+
+declare_cpu_ops cortex_a57, CORTEX_A57_MIDR, \
+	cortex_a57_reset_func, \
+	cortex_a57_core_pwr_dwn, \
+	cortex_a57_cluster_pwr_dwn
diff --git a/lib/cpus/aarch32/cortex_a72.S b/lib/cpus/aarch32/cortex_a72.S
new file mode 100644
index 00000000..69cc2ea5
--- /dev/null
+++ b/lib/cpus/aarch32/cortex_a72.S
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a72.h>
+#include <cpu_macros.S>
+#include <debug.h>
+
+	/* ---------------------------------------------
+	 * Disable all types of L2 prefetches.
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_l2_prefetch
+	ldcopr16	r0, r1, CORTEX_A72_ECTLR
+	orr64_imm	r0, r1, CORTEX_A72_ECTLR_DIS_TWD_ACC_PFTCH_BIT
+	bic64_imm	r0, r1, (CORTEX_A72_ECTLR_L2_IPFTCH_DIST_MASK | \
+				CORTEX_A72_ECTLR_L2_DPFTCH_DIST_MASK)
+	stcopr16	r0, r1, CORTEX_A72_ECTLR
+	isb
+	bx	lr
+endfunc cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_hw_prefetcher
+	ldcopr16	r0, r1, CORTEX_A72_CPUACTLR
+	orr64_imm	r0, r1, CORTEX_A72_CPUACTLR_DISABLE_L1_DCACHE_HW_PFTCH
+	stcopr16	r0, r1, CORTEX_A72_CPUACTLR
+	isb
+	dsb	ish
+	bx	lr
+endfunc cortex_a72_disable_hw_prefetcher
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * Clobbers: r0-r1
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_smp
+	ldcopr16	r0, r1, CORTEX_A72_ECTLR
+	bic64_imm	r0, r1, CORTEX_A72_ECTLR_SMP_BIT
+	stcopr16	r0, r1, CORTEX_A72_ECTLR
+	bx	lr
+endfunc cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_ext_debug
+	mov	r0, #1
+	stcopr	r0, DBGOSDLR
+	isb
+	dsb	sy
+	bx	lr
+endfunc cortex_a72_disable_ext_debug
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A72 Errata #859971.
+	 * This applies only to revision <= r0p3 of Cortex A72.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a72_859971_wa
+	mov		r2,lr
+	bl		check_errata_859971
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A72_CPUACTLR
+	orr64_imm	r1, r1, CORTEX_A72_CPUACTLR_DIS_INSTR_PREFETCH
+	stcopr16	r0, r1, CORTEX_A72_CPUACTLR
+1:
+	bx   		lr
+endfunc errata_a72_859971_wa
+
+func check_errata_859971
+	mov		r1, #0x03
+	b		cpu_rev_var_ls
+endfunc check_errata_859971
+
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A72.
+	 * -------------------------------------------------
+	 */
+func cortex_a72_reset_func
+	mov	r5, lr
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+#if ERRATA_A72_859971
+	mov	r0, r4
+	bl	errata_a72_859971_wa
+#endif
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	ldcopr16	r0, r1, CORTEX_A72_ECTLR
+	orr64_imm	r0, r1, CORTEX_A72_ECTLR_SMP_BIT
+	stcopr16	r0, r1,	CORTEX_A72_ECTLR
+	isb
+	bx	lr
+endfunc cortex_a72_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Cortex-A72.
+	 * ----------------------------------------------------
+	 */
+func cortex_a72_core_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_hw_prefetcher
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a72_disable_ext_debug
+endfunc cortex_a72_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Cortex-A72.
+	 * -------------------------------------------------------
+	 */
+func cortex_a72_cluster_pwr_dwn
+	push	{r12, lr}
+
+	/* Assert if cache is enabled */
+#if ASM_ASSERTION
+	ldcopr	r0, SCTLR
+	tst	r0, #SCTLR_C_BIT
+	ASM_ASSERT(eq)
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_hw_prefetcher
+
+#if !SKIP_A72_L1_FLUSH_PWR_DWN
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level1
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* -------------------------------------------------
+	 * Flush the L2 caches.
+	 * -------------------------------------------------
+	 */
+	mov	r0, #DC_OP_CISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	pop	{r12, lr}
+	b	cortex_a72_disable_ext_debug
+endfunc cortex_a72_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A72. Must follow AAPCS.
+ */
+func cortex_a72_errata_report
+	push	{r12, lr}
+
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A72_859971, cortex_a72, 859971
+
+	pop	{r12, lr}
+	bx	lr
+endfunc cortex_a72_errata_report
+#endif
+
+declare_cpu_ops cortex_a72, CORTEX_A72_MIDR, \
+	cortex_a72_reset_func, \
+	cortex_a72_core_pwr_dwn, \
+	cortex_a72_cluster_pwr_dwn
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
new file mode 100644
index 00000000..bfdc1e4f
--- /dev/null
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cpu_data.h>
+#include <cpu_macros.S>
+
+#if defined(IMAGE_BL1) || defined(IMAGE_BL32)
+	/*
+	 * The reset handler common to all platforms.  After a matching
+	 * cpu_ops structure entry is found, the correponding reset_handler
+	 * in the cpu_ops is invoked. The reset handler is invoked very early
+	 * in the boot sequence and it is assumed that we can clobber r0 - r10
+	 * without the need to follow AAPCS.
+	 * Clobbers: r0 - r10
+	 */
+	.globl	reset_handler
+func reset_handler
+	mov	r10, lr
+
+	/* The plat_reset_handler can clobber r0 - r9 */
+	bl	plat_reset_handler
+
+	/* Get the matching cpu_ops pointer (clobbers: r0 - r5) */
+	bl	get_cpu_ops_ptr
+
+#if ENABLE_ASSERTIONS
+	cmp	r0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the cpu_ops reset handler */
+	ldr	r1, [r0, #CPU_RESET_FUNC]
+	cmp	r1, #0
+	mov	lr, r10
+	bxne	r1
+	bx	lr
+endfunc reset_handler
+
+#endif /* IMAGE_BL1 || IMAGE_BL32 */
+
+#ifdef IMAGE_BL32 /* The power down core and cluster is needed only in  BL32 */
+	/*
+	 * void prepare_cpu_pwr_dwn(unsigned int power_level)
+	 *
+	 * Prepare CPU power down function for all platforms. The function takes
+	 * a domain level to be powered down as its parameter. After the cpu_ops
+	 * pointer is retrieved from cpu_data, the handler for requested power
+	 * level is called.
+	 */
+	.globl	prepare_cpu_pwr_dwn
+func prepare_cpu_pwr_dwn
+	/*
+	 * If the given power level exceeds CPU_MAX_PWR_DWN_OPS, we call the
+	 * power down handler for the last power level
+	 */
+	mov	r2, #(CPU_MAX_PWR_DWN_OPS - 1)
+	cmp	r0, r2
+	movhi	r0, r2
+
+	push	{r0, lr}
+	bl	_cpu_data
+	pop	{r2, lr}
+
+	ldr	r0, [r0, #CPU_DATA_CPU_OPS_PTR]
+#if ENABLE_ASSERTIONS
+	cmp	r0, #0
+	ASM_ASSERT(ne)
+#endif
+
+	/* Get the appropriate power down handler */
+	mov	r1, #CPU_PWR_DWN_OPS
+	add	r1, r1, r2, lsl #2
+	ldr	r1, [r0, r1]
+	bx	r1
+endfunc prepare_cpu_pwr_dwn
+
+	/*
+	 * Initializes the cpu_ops_ptr if not already initialized
+	 * in cpu_data. This must only be called after the data cache
+	 * is enabled. AAPCS is followed.
+	 */
+	.globl	init_cpu_ops
+func init_cpu_ops
+	push	{r4 - r6, lr}
+	bl	_cpu_data
+	mov	r6, r0
+	ldr	r1, [r0, #CPU_DATA_CPU_OPS_PTR]
+	cmp	r1, #0
+	bne	1f
+	bl	get_cpu_ops_ptr
+#if ENABLE_ASSERTIONS
+	cmp	r0, #0
+	ASM_ASSERT(ne)
+#endif
+	str	r0, [r6, #CPU_DATA_CPU_OPS_PTR]!
+1:
+	pop	{r4 - r6, pc}
+endfunc init_cpu_ops
+
+#endif /* IMAGE_BL32 */
+
+	/*
+	 * The below function returns the cpu_ops structure matching the
+	 * midr of the core. It reads the MIDR and finds the matching
+	 * entry in cpu_ops entries. Only the implementation and part number
+	 * are used to match the entries.
+	 * Return :
+	 *     r0 - The matching cpu_ops pointer on Success
+	 *     r0 - 0 on failure.
+	 * Clobbers: r0 - r5
+	 */
+	.globl	get_cpu_ops_ptr
+func get_cpu_ops_ptr
+	/* Get the cpu_ops start and end locations */
+	ldr	r4, =(__CPU_OPS_START__ + CPU_MIDR)
+	ldr	r5, =(__CPU_OPS_END__ + CPU_MIDR)
+
+	/* Initialize the return parameter */
+	mov	r0, #0
+
+	/* Read the MIDR_EL1 */
+	ldcopr	r2, MIDR
+	ldr	r3, =CPU_IMPL_PN_MASK
+
+	/* Retain only the implementation and part number using mask */
+	and	r2, r2, r3
+1:
+	/* Check if we have reached end of list */
+	cmp	r4, r5
+	bhs	error_exit
+
+	/* load the midr from the cpu_ops */
+	ldr	r1, [r4], #CPU_OPS_SIZE
+	and	r1, r1, r3
+
+	/* Check if midr matches to midr of this core */
+	cmp	r1, r2
+	bne	1b
+
+	/* Subtract the increment and offset to get the cpu-ops pointer */
+	sub	r0, r4, #(CPU_OPS_SIZE + CPU_MIDR)
+error_exit:
+	bx	lr
+endfunc get_cpu_ops_ptr
+
+/*
+ * Extract CPU revision and variant, and combine them into a single numeric for
+ * easier comparison.
+ */
+	.globl	cpu_get_rev_var
+func cpu_get_rev_var
+	ldcopr	r1, MIDR
+
+	/*
+	 * Extract the variant[23:20] and revision[3:0] from r1 and pack it in
+	 * r0[0:7] as variant[7:4] and revision[3:0]:
+	 *
+	 * First extract r1[23:16] to r0[7:0] and zero fill the rest. Then
+	 * extract r1[3:0] into r0[3:0] retaining other bits.
+	 */
+	ubfx	r0, r1, #(MIDR_VAR_SHIFT - MIDR_REV_BITS), #(MIDR_REV_BITS + MIDR_VAR_BITS)
+	bfi	r0, r1, #MIDR_REV_SHIFT, #MIDR_REV_BITS
+	bx	lr
+endfunc cpu_get_rev_var
+
+/*
+ * Compare the CPU's revision-variant (r0) with a given value (r1), for errata
+ * application purposes. If the revision-variant is less than or same as a given
+ * value, indicates that errata applies; otherwise not.
+ */
+	.globl	cpu_rev_var_ls
+func cpu_rev_var_ls
+	cmp	r0, r1
+	movls	r0, #ERRATA_APPLIES
+	movhi	r0, #ERRATA_NOT_APPLIES
+	bx	lr
+endfunc cpu_rev_var_ls
+
+/*
+ * Compare the CPU's revision-variant (r0) with a given value (r1), for errata
+ * application purposes. If the revision-variant is higher than or same as a
+ * given value, indicates that errata applies; otherwise not.
+ */
+	.globl	cpu_rev_var_hs
+func cpu_rev_var_hs
+	cmp	r0, r1
+	movge	r0, #ERRATA_APPLIES
+	movlt	r0, #ERRATA_NOT_APPLIES
+	bx	lr
+endfunc cpu_rev_var_hs
+
+#if REPORT_ERRATA
+/*
+ * void print_errata_status(void);
+ *
+ * Function to print errata status for CPUs of its class. Must be called only:
+ *
+ *   - with MMU and data caches are enabled;
+ *   - after cpu_ops have been initialized in per-CPU data.
+ */
+	.globl print_errata_status
+func print_errata_status
+	push	{r4, lr}
+#ifdef IMAGE_BL1
+	/*
+	 * BL1 doesn't have per-CPU data. So retrieve the CPU operations
+	 * directly.
+	 */
+	bl	get_cpu_ops_ptr
+	ldr	r0, [r0, #CPU_ERRATA_FUNC]
+	cmp	r0, #0
+	blxne	r0
+#else
+	/*
+	 * Retrieve pointer to cpu_ops, and further, the errata printing
+	 * function. If it's non-NULL, jump to the function in turn.
+	 */
+	bl	_cpu_data
+	ldr	r1, [r0, #CPU_DATA_CPU_OPS_PTR]
+	ldr	r0, [r1, #CPU_ERRATA_FUNC]
+	cmp	r0, #0
+	beq	1f
+
+	mov	r4, r0
+
+	/*
+	 * Load pointers to errata lock and printed flag. Call
+	 * errata_needs_reporting to check whether this CPU needs to report
+	 * errata status pertaining to its class.
+	 */
+	ldr	r0, [r1, #CPU_ERRATA_LOCK]
+	ldr	r1, [r1, #CPU_ERRATA_PRINTED]
+	bl	errata_needs_reporting
+	cmp	r0, #0
+	blxne	r4
+1:
+#endif
+	pop	{r4, pc}
+endfunc print_errata_status
+#endif
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
index 58a64a6c..7592e3dc 100644
--- a/lib/cpus/aarch64/aem_generic.S
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -1,31 +1,7 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 #include <aem_generic.h>
 #include <arch.h>
@@ -49,6 +25,7 @@ func aem_generic_core_pwr_dwn
 	 * ---------------------------------------------
 	 */
 	b	dcsw_op_louis
+endfunc aem_generic_core_pwr_dwn
 
 
 func aem_generic_cluster_pwr_dwn
@@ -67,6 +44,7 @@ func aem_generic_cluster_pwr_dwn
 	 */
 	mov	x0, #DCCISW
 	b	dcsw_op_all
+endfunc aem_generic_cluster_pwr_dwn
 
 	/* ---------------------------------------------
 	 * This function provides cpu specific
@@ -77,13 +55,22 @@ func aem_generic_cluster_pwr_dwn
 	 * reported.
 	 * ---------------------------------------------
 	 */
+.section .rodata.aem_generic_regs, "aS"
+aem_generic_regs:  /* The ascii list of register names to be reported */
+	.asciz	"" /* no registers to report */
+
 func aem_generic_cpu_reg_dump
-	mov	x6, #0 /* no registers to report */
+	adr	x6, aem_generic_regs
 	ret
+endfunc aem_generic_cpu_reg_dump
 
 
 /* cpu_ops for Base AEM FVP */
-declare_cpu_ops aem_generic, BASE_AEM_MIDR, 1
+declare_cpu_ops aem_generic, BASE_AEM_MIDR, CPU_NO_RESET_FUNC, \
+	aem_generic_core_pwr_dwn, \
+	aem_generic_cluster_pwr_dwn
 
 /* cpu_ops for Foundation FVP */
-declare_cpu_ops aem_generic, FOUNDATION_AEM_MIDR, 1
+declare_cpu_ops aem_generic, FOUNDATION_AEM_MIDR, CPU_NO_RESET_FUNC, \
+	aem_generic_core_pwr_dwn, \
+	aem_generic_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a35.S b/lib/cpus/aarch64/cortex_a35.S
new file mode 100644
index 00000000..b22189c8
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a35.S
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <cortex_a35.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a35_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+endfunc cortex_a35_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a35_disable_smp
+	mrs	x0, CORTEX_A35_CPUECTLR_EL1
+	bic	x0, x0, #CORTEX_A35_CPUECTLR_SMPEN_BIT
+	msr	CORTEX_A35_CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
+endfunc cortex_a35_disable_smp
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A35.
+	 * Clobbers: x0
+	 * -------------------------------------------------
+	 */
+func cortex_a35_reset_func
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A35_CPUECTLR_EL1
+	orr	x0, x0, #CORTEX_A35_CPUECTLR_SMPEN_BIT
+	msr	CORTEX_A35_CPUECTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_a35_reset_func
+
+func cortex_a35_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a35_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a35_disable_smp
+endfunc cortex_a35_core_pwr_dwn
+
+func cortex_a35_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a35_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L2 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a35_disable_smp
+endfunc cortex_a35_cluster_pwr_dwn
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a35 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a35_regs, "aS"
+cortex_a35_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a35_cpu_reg_dump
+	adr	x6, cortex_a35_regs
+	mrs	x8, CORTEX_A35_CPUECTLR_EL1
+	ret
+endfunc cortex_a35_cpu_reg_dump
+
+declare_cpu_ops cortex_a35, CORTEX_A35_MIDR, \
+	cortex_a35_reset_func, \
+	cortex_a35_core_pwr_dwn, \
+	cortex_a35_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index 188f3c1e..3e480bc1 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -1,39 +1,21 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 #include <arch.h>
 #include <asm_macros.S>
 #include <bl_common.h>
 #include <cortex_a53.h>
 #include <cpu_macros.S>
+#include <debug.h>
 #include <plat_macros.S>
 
+#if A53_DISABLE_NON_TEMPORAL_HINT
+#undef ERRATA_A53_836870
+#define ERRATA_A53_836870	1
+#endif
+
 	/* ---------------------------------------------
 	 * Disable L1 data cache and unified L2 cache
 	 * ---------------------------------------------
@@ -44,34 +26,174 @@ func cortex_a53_disable_dcache
 	msr	sctlr_el3, x1
 	isb
 	ret
+endfunc cortex_a53_disable_dcache
 
 	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
 	 */
 func cortex_a53_disable_smp
-	mrs	x0, CPUECTLR_EL1
-	bic	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
+	mrs	x0, CORTEX_A53_ECTLR_EL1
+	bic	x0, x0, #CORTEX_A53_ECTLR_SMP_BIT
+	msr	CORTEX_A53_ECTLR_EL1, x0
 	isb
 	dsb	sy
 	ret
+endfunc cortex_a53_disable_smp
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #826319.
+	 * This applies only to revision <= r0p2 of Cortex A53.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * --------------------------------------------------
+	 */
+func errata_a53_826319_wa
+	/*
+	 * Compare x0 against revision r0p2
+	 */
+	mov	x17, x30
+	bl	check_errata_826319
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A53_L2ACTLR_EL1
+	bic	x1, x1, #CORTEX_A53_L2ACTLR_ENABLE_UNIQUECLEAN
+	orr	x1, x1, #CORTEX_A53_L2ACTLR_DISABLE_CLEAN_PUSH
+	msr	CORTEX_A53_L2ACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a53_826319_wa
+
+func check_errata_826319
+	mov	x1, #0x02
+	b	cpu_rev_var_ls
+endfunc check_errata_826319
+
+	/* ---------------------------------------------------------------------
+	 * Disable the cache non-temporal hint.
+	 *
+	 * This ignores the Transient allocation hint in the MAIR and treats
+	 * allocations the same as non-transient allocation types. As a result,
+	 * the LDNP and STNP instructions in AArch64 behave the same as the
+	 * equivalent LDP and STP instructions.
+	 *
+	 * This is relevant only for revisions <= r0p3 of Cortex-A53.
+	 * From r0p4 and onwards, the bit to disable the hint is enabled by
+	 * default at reset.
+	 *
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------------------------
+	 */
+func a53_disable_non_temporal_hint
+	/*
+	 * Compare x0 against revision r0p3
+	 */
+	mov	x17, x30
+	bl	check_errata_disable_non_temporal_hint
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A53_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A53_CPUACTLR_EL1_DTAH
+	msr	CORTEX_A53_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc a53_disable_non_temporal_hint
+
+func check_errata_disable_non_temporal_hint
+	mov	x1, #0x03
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_non_temporal_hint
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #855873.
+	 *
+	 * This applies only to revisions >= r0p3 of Cortex A53.
+	 * Earlier revisions of the core are affected as well, but don't
+	 * have the chicken bit in the CPUACTLR register. It is expected that
+	 * the rich OS takes care of that, especially as the workaround is
+	 * shared with other erratas in those revisions of the CPU.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * --------------------------------------------------
+	 */
+func errata_a53_855873_wa
+	/*
+	 * Compare x0 against revision r0p3 and higher
+	 */
+        mov     x17, x30
+        bl      check_errata_855873
+        cbz     x0, 1f
+
+	mrs	x1, CORTEX_A53_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A53_CPUACTLR_EL1_ENDCCASCI
+	msr	CORTEX_A53_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a53_855873_wa
+
+func check_errata_855873
+	mov	x1, #0x03
+	b	cpu_rev_var_hs
+endfunc check_errata_855873
 
+/*
+ * Errata workaround for Cortex A53 Errata #835769.
+ * This applies to revisions <= r0p4 of Cortex A53.
+ * This workaround is statically enabled at build time.
+ */
+func check_errata_835769
+	mov	x1, #0x04
+	b	cpu_rev_var_ls
+endfunc check_errata_835769
+
+/*
+ * Errata workaround for Cortex A53 Errata #843419.
+ * This applies to revisions <= r0p4 of Cortex A53.
+ * This workaround is statically enabled at build time.
+ */
+func check_errata_843419
+	mov	x1, #0x04
+	b	cpu_rev_var_ls
+endfunc check_errata_843419
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A53.
+	 * Shall clobber: x0-x19
+	 * -------------------------------------------------
+	 */
 func cortex_a53_reset_func
+	mov	x19, x30
+	bl	cpu_get_rev_var
+	mov	x18, x0
+
+
+#if ERRATA_A53_826319
+	mov	x0, x18
+	bl	errata_a53_826319_wa
+#endif
+
+#if ERRATA_A53_836870
+	mov	x0, x18
+	bl	a53_disable_non_temporal_hint
+#endif
+
+#if ERRATA_A53_855873
+	mov	x0, x18
+	bl	errata_a53_855873_wa
+#endif
+
 	/* ---------------------------------------------
-	 * As a bare minimum enable the SMP bit if it is
-	 * not already set.
-	 * Clobbers : x0
+	 * Enable the SMP bit.
 	 * ---------------------------------------------
 	 */
-	mrs	x0, CPUECTLR_EL1
-	tst	x0, #CPUECTLR_SMP_BIT
-	b.ne	skip_smp_setup
-	orr	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
+	mrs	x0, CORTEX_A53_ECTLR_EL1
+	orr	x0, x0, #CORTEX_A53_ECTLR_SMP_BIT
+	msr	CORTEX_A53_ECTLR_EL1, x0
 	isb
-skip_smp_setup:
-	ret
+	ret	x19
+endfunc cortex_a53_reset_func
 
 func cortex_a53_core_pwr_dwn
 	mov	x18, x30
@@ -95,6 +217,7 @@ func cortex_a53_core_pwr_dwn
 	 */
 	mov	x30, x18
 	b	cortex_a53_disable_smp
+endfunc cortex_a53_core_pwr_dwn
 
 func cortex_a53_cluster_pwr_dwn
 	mov	x18, x30
@@ -131,6 +254,32 @@ func cortex_a53_cluster_pwr_dwn
 	 */
 	mov	x30, x18
 	b	cortex_a53_disable_smp
+endfunc cortex_a53_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A53. Must follow AAPCS.
+ */
+func cortex_a53_errata_report
+	stp	x8, x30, [sp, #-16]!
+
+	bl	cpu_get_rev_var
+	mov	x8, x0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A53_826319, cortex_a53, 826319
+	report_errata ERRATA_A53_835769, cortex_a53, 835769
+	report_errata ERRATA_A53_836870, cortex_a53, disable_non_temporal_hint
+	report_errata ERRATA_A53_843419, cortex_a53, 843419
+	report_errata ERRATA_A53_855873, cortex_a53, 855873
+
+	ldp	x8, x30, [sp], #16
+	ret
+endfunc cortex_a53_errata_report
+#endif
 
 	/* ---------------------------------------------
 	 * This function provides cortex_a53 specific
@@ -143,11 +292,19 @@ func cortex_a53_cluster_pwr_dwn
 	 */
 .section .rodata.cortex_a53_regs, "aS"
 cortex_a53_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", \
+		"cpuactlr_el1", ""
 
 func cortex_a53_cpu_reg_dump
 	adr	x6, cortex_a53_regs
-	mrs	x8, CPUECTLR_EL1
+	mrs	x8, CORTEX_A53_ECTLR_EL1
+	mrs	x9, CORTEX_A53_MERRSR_EL1
+	mrs	x10, CORTEX_A53_L2MERRSR_EL1
+	mrs	x11, CORTEX_A53_CPUACTLR_EL1
 	ret
+endfunc cortex_a53_cpu_reg_dump
 
-declare_cpu_ops cortex_a53, CORTEX_A53_MIDR
+declare_cpu_ops cortex_a53, CORTEX_A53_MIDR, \
+	cortex_a53_reset_func, \
+	cortex_a53_core_pwr_dwn, \
+	cortex_a53_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a55.S b/lib/cpus/aarch64/cortex_a55.S
new file mode 100644
index 00000000..741c7734
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a55.S
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <cortex_a55.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * HW will do the cache maintenance while powering down
+	 * ---------------------------------------------
+	 */
+func cortex_a55_core_pwr_dwn
+	/* ---------------------------------------------
+	 * Enable CPU power down bit in power control register
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A55_CPUPWRCTLR_EL1
+	orr	x0, x0, #CORTEX_A55_CORE_PWRDN_EN_MASK
+	msr	CORTEX_A55_CPUPWRCTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_a55_core_pwr_dwn
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a55 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a55_regs, "aS"
+cortex_a55_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a55_cpu_reg_dump
+	adr	x6, cortex_a55_regs
+	mrs	x8, CORTEX_A55_CPUECTLR_EL1
+	ret
+endfunc cortex_a55_cpu_reg_dump
+
+declare_cpu_ops cortex_a55, CORTEX_A55_MIDR, \
+	CPU_NO_RESET_FUNC, \
+	cortex_a55_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index eb6c736f..a720e984 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -1,31 +1,7 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 #include <arch.h>
 #include <asm_macros.S>
@@ -33,6 +9,7 @@
 #include <bl_common.h>
 #include <cortex_a57.h>
 #include <cpu_macros.S>
+#include <debug.h>
 #include <plat_macros.S>
 
 	/* ---------------------------------------------
@@ -45,31 +22,34 @@ func cortex_a57_disable_dcache
 	msr	sctlr_el3, x1
 	isb
 	ret
+endfunc cortex_a57_disable_dcache
 
 	/* ---------------------------------------------
 	 * Disable all types of L2 prefetches.
 	 * ---------------------------------------------
 	 */
 func cortex_a57_disable_l2_prefetch
-	mrs	x0, CPUECTLR_EL1
-	orr	x0, x0, #CPUECTLR_DIS_TWD_ACC_PFTCH_BIT
-	mov	x1, #CPUECTLR_L2_IPFTCH_DIST_MASK
-	orr	x1, x1, #CPUECTLR_L2_DPFTCH_DIST_MASK
+	mrs	x0, CORTEX_A57_ECTLR_EL1
+	orr	x0, x0, #CORTEX_A57_ECTLR_DIS_TWD_ACC_PFTCH_BIT
+	mov	x1, #CORTEX_A57_ECTLR_L2_IPFTCH_DIST_MASK
+	orr	x1, x1, #CORTEX_A57_ECTLR_L2_DPFTCH_DIST_MASK
 	bic	x0, x0, x1
-	msr	CPUECTLR_EL1, x0
+	msr	CORTEX_A57_ECTLR_EL1, x0
 	isb
 	dsb	ish
 	ret
+endfunc cortex_a57_disable_l2_prefetch
 
 	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
 	 */
 func cortex_a57_disable_smp
-	mrs	x0, CPUECTLR_EL1
-	bic	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
+	mrs	x0, CORTEX_A57_ECTLR_EL1
+	bic	x0, x0, #CORTEX_A57_ECTLR_SMP_BIT
+	msr	CORTEX_A57_ECTLR_EL1, x0
 	ret
+endfunc cortex_a57_disable_smp
 
 	/* ---------------------------------------------
 	 * Disable debug interfaces
@@ -81,113 +61,338 @@ func cortex_a57_disable_ext_debug
 	isb
 	dsb	sy
 	ret
+endfunc cortex_a57_disable_ext_debug
 
 	/* --------------------------------------------------
 	 * Errata Workaround for Cortex A57 Errata #806969.
 	 * This applies only to revision r0p0 of Cortex A57.
 	 * Inputs:
 	 * x0: variant[4:7] and revision[0:3] of current cpu.
-	 * Clobbers : x0 - x5
+	 * Shall clobber: x0-x17
 	 * --------------------------------------------------
 	 */
 func errata_a57_806969_wa
 	/*
 	 * Compare x0 against revision r0p0
 	 */
-	cbz	x0, apply_806969
-#if DEBUG
-	b	print_revision_warning
-#else
-	ret
-#endif
-apply_806969:
+	mov	x17, x30
+	bl	check_errata_806969
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_NO_ALLOC_WBWA
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_806969_wa
+
+func check_errata_806969
+	mov	x1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_806969
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #813419.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * ---------------------------------------------------
+	 */
+func check_errata_813419
 	/*
-	 * Test if errata has already been applied in an earlier
-	 * invocation of the reset handler and does not need to
-	 * be applied again.
-	 */
-	mrs	x1, CPUACTLR_EL1
-	tst	x1, #CPUACTLR_NO_ALLOC_WBWA
-	b.ne	skip_806969
-	orr	x1, x1, #CPUACTLR_NO_ALLOC_WBWA
-	msr	CPUACTLR_EL1, x1
-skip_806969:
+	 * Even though this is only needed for revision r0p0, it
+	 * is always applied due to limitations of the current
+	 * errata framework.
+	 */
+	mov	x0, #ERRATA_APPLIES
 	ret
-
+endfunc check_errata_813419
 
 	/* ---------------------------------------------------
 	 * Errata Workaround for Cortex A57 Errata #813420.
 	 * This applies only to revision r0p0 of Cortex A57.
 	 * Inputs:
 	 * x0: variant[4:7] and revision[0:3] of current cpu.
-	 * Clobbers : x0 - x5
+	 * Shall clobber: x0-x17
 	 * ---------------------------------------------------
 	 */
 func errata_a57_813420_wa
 	/*
 	 * Compare x0 against revision r0p0
 	 */
-	cbz	x0, apply_813420
-#if DEBUG
-	b	print_revision_warning
-#else
-	ret
-#endif
-apply_813420:
+	mov	x17, x30
+	bl	check_errata_813420
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_DCC_AS_DCCI
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_813420_wa
+
+func check_errata_813420
+	mov	x1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_813420
+
+	/* --------------------------------------------------------------------
+	 * Disable the over-read from the LDNP instruction.
+	 *
+	 * This applies to all revisions <= r1p2. The performance degradation
+	 * observed with LDNP/STNP has been fixed on r1p3 and onwards.
+	 *
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------------------------
+	 */
+func a57_disable_ldnp_overread
 	/*
-	 * Test if errata has already been applied in an earlier
-	 * invocation of the reset handler and does not need to
-	 * be applied again.
-	 */
-	mrs	x1, CPUACTLR_EL1
-	tst	x1, #CPUACTLR_DCC_AS_DCCI
-	b.ne	skip_813420
-	orr	x1, x1, #CPUACTLR_DCC_AS_DCCI
-	msr	CPUACTLR_EL1, x1
-skip_813420:
-	ret
+	 * Compare x0 against revision r1p2
+	 */
+	mov	x17, x30
+	bl	check_errata_disable_ldnp_overread
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_DIS_OVERREAD
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc a57_disable_ldnp_overread
+
+func check_errata_disable_ldnp_overread
+	mov	x1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_ldnp_overread
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826974.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826974_wa
+	/*
+	 * Compare x0 against revision r1p1
+	 */
+	mov	x17, x30
+	bl	check_errata_826974
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_DIS_LOAD_PASS_DMB
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_826974_wa
+
+func check_errata_826974
+	mov	x1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826974
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826977.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826977_wa
+	/*
+	 * Compare x0 against revision r1p1
+	 */
+	mov	x17, x30
+	bl	check_errata_826977
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_GRE_NGRE_AS_NGNRE
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_826977_wa
+
+func check_errata_826977
+	mov	x1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826977
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #828024.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------
+	 */
+func errata_a57_828024_wa
+	/*
+	 * Compare x0 against revision r1p1
+	 */
+	mov	x17, x30
+	bl	check_errata_828024
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	/*
+	 * Setting the relevant bits in CPUACTLR_EL1 has to be done in 2
+	 * instructions here because the resulting bitmask doesn't fit in a
+	 * 16-bit value so it cannot be encoded in a single instruction.
+	 */
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_NO_ALLOC_WBWA
+	orr	x1, x1, #(CORTEX_A57_CPUACTLR_EL1_DIS_L1_STREAMING | \
+			  CORTEX_A57_CPUACTLR_EL1_DIS_STREAMING)
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_828024_wa
+
+func check_errata_828024
+	mov	x1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_828024
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #829520.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------
+	 */
+func errata_a57_829520_wa
+	/*
+	 * Compare x0 against revision r1p2
+	 */
+	mov	x17, x30
+	bl	check_errata_829520
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_DIS_INDIRECT_PREDICTOR
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_829520_wa
+
+func check_errata_829520
+	mov	x1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_829520
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #833471.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: x0-x17
+	 * ---------------------------------------------------
+	 */
+func errata_a57_833471_wa
+	/*
+	 * Compare x0 against revision r1p2
+	 */
+	mov	x17, x30
+	bl	check_errata_833471
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_FORCE_FPSCR_FLUSH
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_833471_wa
+
+func check_errata_833471
+	mov	x1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_833471
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #859972.
+	 * This applies only to revision <= r1p3 of Cortex A57.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber:
+	 * --------------------------------------------------
+	 */
+func errata_a57_859972_wa
+	mov	x17, x30
+	bl	check_errata_859972
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A57_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A57_CPUACTLR_EL1_DIS_INSTR_PREFETCH
+	msr	CORTEX_A57_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a57_859972_wa
+
+func check_errata_859972
+	mov	x1, #0x13
+	b	cpu_rev_var_ls
+endfunc check_errata_859972
 
 	/* -------------------------------------------------
 	 * The CPU Ops reset function for Cortex-A57.
-	 * Clobbers: x0-x5, x15, x19, x30
+	 * Shall clobber: x0-x19
 	 * -------------------------------------------------
 	 */
 func cortex_a57_reset_func
 	mov	x19, x30
-	mrs	x0, midr_el1
-
-	/*
-	 * Extract the variant[20:23] and revision[0:3] from x0
-	 * and pack it in x15[0:7] as variant[4:7] and revision[0:3].
-	 * First extract x0[16:23] to x15[0:7] and zero fill the rest.
-	 * Then extract x0[0:3] into x15[0:3] retaining other bits.
-	 */
-	ubfx	x15, x0, #(MIDR_VAR_SHIFT - MIDR_REV_BITS), #(MIDR_REV_BITS + MIDR_VAR_BITS)
-	bfxil	x15, x0, #MIDR_REV_SHIFT, #MIDR_REV_BITS
+	bl	cpu_get_rev_var
+	mov	x18, x0
 
 #if ERRATA_A57_806969
-	mov	x0, x15
+	mov	x0, x18
 	bl	errata_a57_806969_wa
 #endif
 
 #if ERRATA_A57_813420
-	mov	x0, x15
+	mov	x0, x18
 	bl	errata_a57_813420_wa
 #endif
 
+#if A57_DISABLE_NON_TEMPORAL_HINT
+	mov	x0, x18
+	bl	a57_disable_ldnp_overread
+#endif
+
+#if ERRATA_A57_826974
+	mov	x0, x18
+	bl	errata_a57_826974_wa
+#endif
+
+#if ERRATA_A57_826977
+	mov	x0, x18
+	bl	errata_a57_826977_wa
+#endif
+
+#if ERRATA_A57_828024
+	mov	x0, x18
+	bl	errata_a57_828024_wa
+#endif
+
+#if ERRATA_A57_829520
+	mov	x0, x18
+	bl	errata_a57_829520_wa
+#endif
+
+#if ERRATA_A57_833471
+	mov	x0, x18
+	bl	errata_a57_833471_wa
+#endif
+
+#if ERRATA_A57_859972
+	mov	x0, x18
+	bl	errata_a57_859972_wa
+#endif
+
 	/* ---------------------------------------------
-	 * As a bare minimum enable the SMP bit if it is
-	 * not already set.
+	 * Enable the SMP bit.
 	 * ---------------------------------------------
 	 */
-	mrs	x0, CPUECTLR_EL1
-	tst	x0, #CPUECTLR_SMP_BIT
-	b.ne	skip_smp_setup
-	orr	x0, x0, #CPUECTLR_SMP_BIT
-	msr	CPUECTLR_EL1, x0
-skip_smp_setup:
+	mrs	x0, CORTEX_A57_ECTLR_EL1
+	orr	x0, x0, #CORTEX_A57_ECTLR_SMP_BIT
+	msr	CORTEX_A57_ECTLR_EL1, x0
 	isb
 	ret	x19
+endfunc cortex_a57_reset_func
 
 	/* ----------------------------------------------------
 	 * The CPU Ops core power down function for Cortex-A57.
@@ -227,6 +432,7 @@ func cortex_a57_core_pwr_dwn
 	 */
 	mov	x30, x18
 	b	cortex_a57_disable_ext_debug
+endfunc cortex_a57_core_pwr_dwn
 
 	/* -------------------------------------------------------
 	 * The CPU Ops cluster power down function for Cortex-A57.
@@ -280,6 +486,39 @@ func cortex_a57_cluster_pwr_dwn
 	 */
 	mov	x30, x18
 	b	cortex_a57_disable_ext_debug
+endfunc cortex_a57_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A57. Must follow AAPCS.
+ */
+func cortex_a57_errata_report
+	stp	x8, x30, [sp, #-16]!
+
+	bl	cpu_get_rev_var
+	mov	x8, x0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A57_806969, cortex_a57, 806969
+	report_errata ERRATA_A57_813419, cortex_a57, 813419
+	report_errata ERRATA_A57_813420, cortex_a57, 813420
+	report_errata A57_DISABLE_NON_TEMPORAL_HINT, cortex_a57, \
+		disable_ldnp_overread
+	report_errata ERRATA_A57_826974, cortex_a57, 826974
+	report_errata ERRATA_A57_826977, cortex_a57, 826977
+	report_errata ERRATA_A57_828024, cortex_a57, 828024
+	report_errata ERRATA_A57_829520, cortex_a57, 829520
+	report_errata ERRATA_A57_833471, cortex_a57, 833471
+	report_errata ERRATA_A57_859972, cortex_a57, 859972
+
+
+	ldp	x8, x30, [sp], #16
+	ret
+endfunc cortex_a57_errata_report
+#endif
 
 	/* ---------------------------------------------
 	 * This function provides cortex_a57 specific
@@ -292,12 +531,18 @@ func cortex_a57_cluster_pwr_dwn
 	 */
 .section .rodata.cortex_a57_regs, "aS"
 cortex_a57_regs:  /* The ascii list of register names to be reported */
-	.asciz	"cpuectlr_el1", ""
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", ""
 
 func cortex_a57_cpu_reg_dump
 	adr	x6, cortex_a57_regs
-	mrs	x8, CPUECTLR_EL1
+	mrs	x8, CORTEX_A57_ECTLR_EL1
+	mrs	x9, CORTEX_A57_MERRSR_EL1
+	mrs	x10, CORTEX_A57_L2MERRSR_EL1
 	ret
+endfunc cortex_a57_cpu_reg_dump
 
 
-declare_cpu_ops cortex_a57, CORTEX_A57_MIDR
+declare_cpu_ops cortex_a57, CORTEX_A57_MIDR, \
+	cortex_a57_reset_func, \
+	cortex_a57_core_pwr_dwn, \
+	cortex_a57_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a72.S b/lib/cpus/aarch64/cortex_a72.S
new file mode 100644
index 00000000..b0341256
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a72.S
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <cortex_a72.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * Disable L1 data cache and unified L2 cache
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+endfunc cortex_a72_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable all types of L2 prefetches.
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_l2_prefetch
+	mrs	x0, CORTEX_A72_ECTLR_EL1
+	orr	x0, x0, #CORTEX_A72_ECTLR_DIS_TWD_ACC_PFTCH_BIT
+	mov	x1, #CORTEX_A72_ECTLR_L2_IPFTCH_DIST_MASK
+	orr	x1, x1, #CORTEX_A72_ECTLR_L2_DPFTCH_DIST_MASK
+	bic	x0, x0, x1
+	msr	CORTEX_A72_ECTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_hw_prefetcher
+	mrs	x0, CORTEX_A72_CPUACTLR_EL1
+	orr	x0, x0, #CORTEX_A72_CPUACTLR_EL1_DISABLE_L1_DCACHE_HW_PFTCH
+	msr	CORTEX_A72_CPUACTLR_EL1, x0
+	isb
+	dsb	ish
+	ret
+endfunc cortex_a72_disable_hw_prefetcher
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_smp
+	mrs	x0, CORTEX_A72_ECTLR_EL1
+	bic	x0, x0, #CORTEX_A72_ECTLR_SMP_BIT
+	msr	CORTEX_A72_ECTLR_EL1, x0
+	ret
+endfunc cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func cortex_a72_disable_ext_debug
+	mov	x0, #1
+	msr	osdlr_el1, x0
+	isb
+	dsb	sy
+	ret
+endfunc cortex_a72_disable_ext_debug
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A72 Errata #859971.
+	 * This applies only to revision <= r0p3 of Cortex A72.
+	 * Inputs:
+	 * x0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber:
+	 * --------------------------------------------------
+	 */
+func errata_a72_859971_wa
+	mov	x17,x30
+	bl	check_errata_859971
+	cbz	x0, 1f
+	mrs	x1, CORTEX_A72_CPUACTLR_EL1
+	orr	x1, x1, #CORTEX_A72_CPUACTLR_EL1_DIS_INSTR_PREFETCH
+	msr	CORTEX_A72_CPUACTLR_EL1, x1
+1:
+	ret	x17
+endfunc errata_a72_859971_wa
+
+func check_errata_859971
+	mov	x1, #0x03
+	b	cpu_rev_var_ls
+endfunc check_errata_859971
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Cortex-A72.
+	 * -------------------------------------------------
+	 */
+func cortex_a72_reset_func
+	mov	x19, x30
+	bl	cpu_get_rev_var
+	mov	x18, x0
+
+#if ERRATA_A72_859971
+	mov	x0, x18
+	bl	errata_a72_859971_wa
+#endif
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A72_ECTLR_EL1
+	orr	x0, x0, #CORTEX_A72_ECTLR_SMP_BIT
+	msr	CORTEX_A72_ECTLR_EL1, x0
+	isb
+	ret x19
+endfunc cortex_a72_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Cortex-A72.
+	 * ----------------------------------------------------
+	 */
+func cortex_a72_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_hw_prefetcher
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a72_disable_ext_debug
+endfunc cortex_a72_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Cortex-A72.
+	 * -------------------------------------------------------
+	 */
+func cortex_a72_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable the L2 prefetches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_l2_prefetch
+
+	/* ---------------------------------------------
+	 * Disable the load-store hardware prefetcher.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_hw_prefetcher
+
+#if !SKIP_A72_L1_FLUSH_PWR_DWN
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+#endif
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* -------------------------------------------------
+	 * Flush the L2 caches.
+	 * -------------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a72_disable_smp
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a72_disable_ext_debug
+endfunc cortex_a72_cluster_pwr_dwn
+
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A72. Must follow AAPCS.
+ */
+func cortex_a72_errata_report
+	stp	x8, x30, [sp, #-16]!
+
+	bl	cpu_get_rev_var
+	mov	x8, x0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A72_859971, cortex_a72, 859971
+
+	ldp	x8, x30, [sp], #16
+	ret
+endfunc cortex_a72_errata_report
+#endif
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a72 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a72_regs, "aS"
+cortex_a72_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", "cpumerrsr_el1", "l2merrsr_el1", ""
+
+func cortex_a72_cpu_reg_dump
+	adr	x6, cortex_a72_regs
+	mrs	x8, CORTEX_A72_ECTLR_EL1
+	mrs	x9, CORTEX_A72_MERRSR_EL1
+	mrs	x10, CORTEX_A72_L2MERRSR_EL1
+	ret
+endfunc cortex_a72_cpu_reg_dump
+
+
+declare_cpu_ops cortex_a72, CORTEX_A72_MIDR, \
+	cortex_a72_reset_func, \
+	cortex_a72_core_pwr_dwn, \
+	cortex_a72_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a73.S b/lib/cpus/aarch64/cortex_a73.S
new file mode 100644
index 00000000..f642816e
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a73.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <cortex_a73.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	/* ---------------------------------------------
+	 * Disable L1 data cache
+	 * ---------------------------------------------
+	 */
+func cortex_a73_disable_dcache
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
+	ret
+endfunc cortex_a73_disable_dcache
+
+	/* ---------------------------------------------
+	 * Disable intra-cluster coherency
+	 * ---------------------------------------------
+	 */
+func cortex_a73_disable_smp
+	mrs	x0, CORTEX_A73_CPUECTLR_EL1
+	bic	x0, x0, #CORTEX_A73_CPUECTLR_SMP_BIT
+	msr	CORTEX_A73_CPUECTLR_EL1, x0
+	isb
+	dsb	sy
+	ret
+endfunc cortex_a73_disable_smp
+
+func cortex_a73_reset_func
+	/* ---------------------------------------------
+	 * Enable the SMP bit.
+	 * Clobbers : x0
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A73_CPUECTLR_EL1
+	orr	x0, x0, #CORTEX_A73_CPUECTLR_SMP_BIT
+	msr	CORTEX_A73_CPUECTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_a73_reset_func
+
+func cortex_a73_core_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a73_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a73_disable_smp
+endfunc cortex_a73_core_pwr_dwn
+
+func cortex_a73_cluster_pwr_dwn
+	mov	x18, x30
+
+	/* ---------------------------------------------
+	 * Turn off caches.
+	 * ---------------------------------------------
+	 */
+	bl	cortex_a73_disable_dcache
+
+	/* ---------------------------------------------
+	 * Flush L1 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level1
+
+	/* ---------------------------------------------
+	 * Disable the optional ACP.
+	 * ---------------------------------------------
+	 */
+	bl	plat_disable_acp
+
+	/* ---------------------------------------------
+	 * Flush L2 caches.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	bl	dcsw_op_level2
+
+	/* ---------------------------------------------
+	 * Come out of intra cluster coherency
+	 * ---------------------------------------------
+	 */
+	mov	x30, x18
+	b	cortex_a73_disable_smp
+endfunc cortex_a73_cluster_pwr_dwn
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a73 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a73_regs, "aS"
+cortex_a73_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", "l2merrsr_el1", ""
+
+func cortex_a73_cpu_reg_dump
+	adr	x6, cortex_a73_regs
+	mrs	x8, CORTEX_A73_CPUECTLR_EL1
+	mrs	x9, CORTEX_A73_L2MERRSR_EL1
+	ret
+endfunc cortex_a73_cpu_reg_dump
+
+declare_cpu_ops cortex_a73, CORTEX_A73_MIDR, \
+	cortex_a73_reset_func, \
+	cortex_a73_core_pwr_dwn, \
+	cortex_a73_cluster_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a75.S b/lib/cpus/aarch64/cortex_a75.S
new file mode 100644
index 00000000..1f4500cb
--- /dev/null
+++ b/lib/cpus/aarch64/cortex_a75.S
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <bl_common.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+#include <cortex_a75.h>
+
+	/* ---------------------------------------------
+	 * HW will do the cache maintenance while powering down
+	 * ---------------------------------------------
+	 */
+func cortex_a75_core_pwr_dwn
+	/* ---------------------------------------------
+	 * Enable CPU power down bit in power control register
+	 * ---------------------------------------------
+	 */
+	mrs	x0, CORTEX_A75_CPUPWRCTLR_EL1
+	orr	x0, x0, #CORTEX_A75_CORE_PWRDN_EN_MASK
+	msr	CORTEX_A75_CPUPWRCTLR_EL1, x0
+	isb
+	ret
+endfunc cortex_a75_core_pwr_dwn
+
+	/* ---------------------------------------------
+	 * This function provides cortex_a75 specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.cortex_a75_regs, "aS"
+cortex_a75_regs:  /* The ascii list of register names to be reported */
+	.asciz	"cpuectlr_el1", ""
+
+func cortex_a75_cpu_reg_dump
+	adr	x6, cortex_a75_regs
+	mrs	x8, CORTEX_A75_CPUECTLR_EL1
+	ret
+endfunc cortex_a75_cpu_reg_dump
+
+declare_cpu_ops cortex_a75, CORTEX_A75_MIDR, \
+	CPU_NO_RESET_FUNC, \
+	cortex_a75_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
index bebe7c0a..23845534 100644
--- a/lib/cpus/aarch64/cpu_helpers.S
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -1,43 +1,21 @@
 /*
- * Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
-#include <cpu_macros.S>
-#if IMAGE_BL31
+#ifdef IMAGE_BL31
 #include <cpu_data.h>
 #endif
+#include <cpu_macros.S>
+#include <debug.h>
+#include <errata_report.h>
 
  /* Reset fn is needed in BL at reset vector */
-#if IMAGE_BL1 || IMAGE_BL31
+#if defined(IMAGE_BL1) || defined(IMAGE_BL31)
 	/*
 	 * The reset handler common to all platforms.  After a matching
 	 * cpu_ops structure entry is found, the correponding reset_handler
@@ -53,7 +31,7 @@ func reset_handler
 
 	/* Get the matching cpu_ops pointer */
 	bl	get_cpu_ops_ptr
-#if ASM_ASSERTION
+#if ENABLE_ASSERTIONS
 	cmp	x0, #0
 	ASM_ASSERT(ne)
 #endif
@@ -67,50 +45,48 @@ func reset_handler
 	br	x2
 1:
 	ret
+endfunc reset_handler
 
 #endif /* IMAGE_BL1 || IMAGE_BL31 */
 
-#if IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
+#ifdef IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
 	/*
-	 * The prepare core power down function for all platforms.  After
-	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
-	 * pwr_dwn_core in the cpu_ops is invoked.
+	 * void prepare_cpu_pwr_dwn(unsigned int power_level)
+	 *
+	 * Prepare CPU power down function for all platforms. The function takes
+	 * a domain level to be powered down as its parameter. After the cpu_ops
+	 * pointer is retrieved from cpu_data, the handler for requested power
+	 * level is called.
 	 */
-	.globl	prepare_core_pwr_dwn
-func prepare_core_pwr_dwn
-	mrs	x1, tpidr_el3
-	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
-#if ASM_ASSERTION
-	cmp	x0, #0
-	ASM_ASSERT(ne)
-#endif
-
-	/* Get the cpu_ops core_pwr_dwn handler */
-	ldr	x1, [x0, #CPU_PWR_DWN_CORE]
-	br	x1
-
+	.globl	prepare_cpu_pwr_dwn
+func prepare_cpu_pwr_dwn
 	/*
-	 * The prepare cluster power down function for all platforms.  After
-	 * the cpu_ops pointer is retrieved from cpu_data, the corresponding
-	 * pwr_dwn_cluster in the cpu_ops is invoked.
+	 * If the given power level exceeds CPU_MAX_PWR_DWN_OPS, we call the
+	 * power down handler for the last power level
 	 */
-	.globl	prepare_cluster_pwr_dwn
-func prepare_cluster_pwr_dwn
+	mov_imm	x2, (CPU_MAX_PWR_DWN_OPS - 1)
+	cmp	x0, x2
+	csel	x2, x2, x0, hi
+
 	mrs	x1, tpidr_el3
 	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
-#if ASM_ASSERTION
+#if ENABLE_ASSERTIONS
 	cmp	x0, #0
 	ASM_ASSERT(ne)
 #endif
 
-	/* Get the cpu_ops cluster_pwr_dwn handler */
-	ldr	x1, [x0, #CPU_PWR_DWN_CLUSTER]
+	/* Get the appropriate power down handler */
+	mov	x1, #CPU_PWR_DWN_OPS
+	add	x1, x1, x2, lsl #3
+	ldr	x1, [x0, x1]
 	br	x1
+endfunc prepare_cpu_pwr_dwn
 
 
 	/*
 	 * Initializes the cpu_ops_ptr if not already initialized
-	 * in cpu_data. This can be called without a runtime stack.
+	 * in cpu_data. This can be called without a runtime stack, but may
+	 * only be called after the MMU is enabled.
 	 * clobbers: x0 - x6, x10
 	 */
 	.globl	init_cpu_ops
@@ -120,29 +96,18 @@ func init_cpu_ops
 	cbnz	x0, 1f
 	mov	x10, x30
 	bl	get_cpu_ops_ptr
-#if ASM_ASSERTION
+#if ENABLE_ASSERTIONS
 	cmp	x0, #0
 	ASM_ASSERT(ne)
 #endif
 	str	x0, [x6, #CPU_DATA_CPU_OPS_PTR]!
-
-	/*
-	 * Make sure that any pre-fetched cache copies are invalidated.
-	 * Ensure that we are running with cache disable else we
-	 * invalidate our own update.
-	 */
-#if ASM_ASSERTION
-	mrs	x1, sctlr_el3
-	tst	x1, #SCTLR_C_BIT
-	ASM_ASSERT(eq)
-#endif
-	dc	ivac, x6
 	mov x30, x10
 1:
 	ret
+endfunc init_cpu_ops
 #endif /* IMAGE_BL31 */
 
-#if IMAGE_BL31 && CRASH_REPORTING
+#if defined(IMAGE_BL31) && CRASH_REPORTING
 	/*
 	 * The cpu specific registers which need to be reported in a crash
 	 * are reported via cpu_ops cpu_reg_dump function. After a matching
@@ -164,6 +129,7 @@ func do_cpu_reg_dump
 1:
 	mov	x30, x16
 	ret
+endfunc do_cpu_reg_dump
 #endif
 
 	/*
@@ -208,29 +174,110 @@ func get_cpu_ops_ptr
 	sub	x0, x4, #(CPU_OPS_SIZE + CPU_MIDR)
 error_exit:
 	ret
+endfunc get_cpu_ops_ptr
+
+/*
+ * Extract CPU revision and variant, and combine them into a single numeric for
+ * easier comparison.
+ */
+	.globl	cpu_get_rev_var
+func cpu_get_rev_var
+	mrs	x1, midr_el1
 
-#if DEBUG
 	/*
-	 * This function prints a warning message to the crash console
-	 * if the CPU revision/part number does not match the errata
-	 * workaround enabled in the build.
-	 * Clobber: x30, x0 - x5
+	 * Extract the variant[23:20] and revision[3:0] from MIDR, and pack them
+	 * as variant[7:4] and revision[3:0] of x0.
+	 *
+	 * First extract x1[23:16] to x0[7:0] and zero fill the rest. Then
+	 * extract x1[3:0] into x0[3:0] retaining other bits.
 	 */
-.section .rodata.rev_warn_str, "aS"
-rev_warn_str:
-	.asciz "Warning: Skipping Errata workaround for non matching CPU revision number.\n"
-
-	.globl	print_revision_warning
-func print_revision_warning
-	mov	x5, x30
-	/* Ensure the console is initialized */
-	bl	plat_crash_console_init
-	/* Check if the console is initialized */
-	cbz	x0, 1f
-	/* The console is initialized */
-	adr	x4, rev_warn_str
-	bl	asm_print_str
-1:
-	ret	x5
-#endif
+	ubfx	x0, x1, #(MIDR_VAR_SHIFT - MIDR_REV_BITS), #(MIDR_REV_BITS + MIDR_VAR_BITS)
+	bfxil	x0, x1, #MIDR_REV_SHIFT, #MIDR_REV_BITS
+	ret
+endfunc cpu_get_rev_var
 
+/*
+ * Compare the CPU's revision-variant (x0) with a given value (x1), for errata
+ * application purposes. If the revision-variant is less than or same as a given
+ * value, indicates that errata applies; otherwise not.
+ */
+	.globl	cpu_rev_var_ls
+func cpu_rev_var_ls
+	mov	x2, #ERRATA_APPLIES
+	mov	x3, #ERRATA_NOT_APPLIES
+	cmp	x0, x1
+	csel	x0, x2, x3, ls
+	ret
+endfunc cpu_rev_var_ls
+
+/*
+ * Compare the CPU's revision-variant (x0) with a given value (x1), for errata
+ * application purposes. If the revision-variant is higher than or same as a
+ * given value, indicates that errata applies; otherwise not.
+ */
+	.globl	cpu_rev_var_hs
+func cpu_rev_var_hs
+	mov	x2, #ERRATA_APPLIES
+	mov	x3, #ERRATA_NOT_APPLIES
+	cmp	x0, x1
+	csel	x0, x2, x3, hs
+	ret
+endfunc cpu_rev_var_hs
+
+#if REPORT_ERRATA
+/*
+ * void print_errata_status(void);
+ *
+ * Function to print errata status for CPUs of its class. Must be called only:
+ *
+ *   - with MMU and data caches are enabled;
+ *   - after cpu_ops have been initialized in per-CPU data.
+ */
+	.globl print_errata_status
+func print_errata_status
+#ifdef IMAGE_BL1
+	/*
+	 * BL1 doesn't have per-CPU data. So retrieve the CPU operations
+	 * directly.
+	 */
+	stp	xzr, x30, [sp, #-16]!
+	bl	get_cpu_ops_ptr
+	ldp	xzr, x30, [sp], #16
+	ldr	x1, [x0, #CPU_ERRATA_FUNC]
+	cbnz	x1, .Lprint
+#else
+	/*
+	 * Retrieve pointer to cpu_ops from per-CPU data, and further, the
+	 * errata printing function. If it's non-NULL, jump to the function in
+	 * turn.
+	 */
+	mrs	x0, tpidr_el3
+	ldr	x1, [x0, #CPU_DATA_CPU_OPS_PTR]
+	ldr	x0, [x1, #CPU_ERRATA_FUNC]
+	cbz	x0, .Lnoprint
+
+	/*
+	 * Printing errata status requires atomically testing the printed flag.
+	 */
+	stp	x19, x30, [sp, #-16]!
+	mov	x19, x0
+
+	/*
+	 * Load pointers to errata lock and printed flag. Call
+	 * errata_needs_reporting to check whether this CPU needs to report
+	 * errata status pertaining to its class.
+	 */
+	ldr	x0, [x1, #CPU_ERRATA_LOCK]
+	ldr	x1, [x1, #CPU_ERRATA_PRINTED]
+	bl	errata_needs_reporting
+	mov	x1, x19
+	ldp	x19, x30, [sp], #16
+	cbnz	x0, .Lprint
+#endif
+.Lnoprint:
+	ret
+.Lprint:
+	/* Jump to errata reporting function for this CPU */
+	br	x1
+endfunc print_errata_status
+#endif
diff --git a/lib/cpus/aarch64/denver.S b/lib/cpus/aarch64/denver.S
new file mode 100644
index 00000000..a6225d4b
--- /dev/null
+++ b/lib/cpus/aarch64/denver.S
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2015-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <denver.h>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	.global	denver_disable_dco
+
+	/* ---------------------------------------------
+	 * Disable debug interfaces
+	 * ---------------------------------------------
+	 */
+func denver_disable_ext_debug
+	mov	x0, #1
+	msr	osdlr_el1, x0
+	isb
+	dsb	sy
+	ret
+endfunc denver_disable_ext_debug
+
+	/* ----------------------------------------------------
+	 * Enable dynamic code optimizer (DCO)
+	 * ----------------------------------------------------
+	 */
+func denver_enable_dco
+	mrs	x0, mpidr_el1
+	and	x0, x0, #0xF
+	mov	x1, #1
+	lsl	x1, x1, x0
+	msr	s3_0_c15_c0_2, x1
+	ret
+endfunc denver_enable_dco
+
+	/* ----------------------------------------------------
+	 * Disable dynamic code optimizer (DCO)
+	 * ----------------------------------------------------
+	 */
+func denver_disable_dco
+
+	/* turn off background work */
+	mrs	x0, mpidr_el1
+	and	x0, x0, #0xF
+	mov	x1, #1
+	lsl	x1, x1, x0
+	lsl	x2, x1, #16
+	msr	s3_0_c15_c0_2, x2
+	isb
+
+	/* wait till the background work turns off */
+1:	mrs	x2, s3_0_c15_c0_2
+	lsr	x2, x2, #32
+	and	w2, w2, 0xFFFF
+	and	x2, x2, x1
+	cbnz	x2, 1b
+
+	ret
+endfunc denver_disable_dco
+
+	/* -------------------------------------------------
+	 * The CPU Ops reset function for Denver.
+	 * -------------------------------------------------
+	 */
+func denver_reset_func
+
+	mov	x19, x30
+
+	/* ----------------------------------------------------
+	 * Enable dynamic code optimizer (DCO)
+	 * ----------------------------------------------------
+	 */
+	bl	denver_enable_dco
+
+	ret	x19
+endfunc denver_reset_func
+
+	/* ----------------------------------------------------
+	 * The CPU Ops core power down function for Denver.
+	 * ----------------------------------------------------
+	 */
+func denver_core_pwr_dwn
+
+	mov	x19, x30
+
+	/* ---------------------------------------------
+	 * Force the debug interfaces to be quiescent
+	 * ---------------------------------------------
+	 */
+	bl	denver_disable_ext_debug
+
+	ret	x19
+endfunc denver_core_pwr_dwn
+
+	/* -------------------------------------------------------
+	 * The CPU Ops cluster power down function for Denver.
+	 * -------------------------------------------------------
+	 */
+func denver_cluster_pwr_dwn
+	ret
+endfunc denver_cluster_pwr_dwn
+
+	/* ---------------------------------------------
+	 * This function provides Denver specific
+	 * register information for crash reporting.
+	 * It needs to return with x6 pointing to
+	 * a list of register names in ascii and
+	 * x8 - x15 having values of registers to be
+	 * reported.
+	 * ---------------------------------------------
+	 */
+.section .rodata.denver_regs, "aS"
+denver_regs:  /* The ascii list of register names to be reported */
+	.asciz	"actlr_el1", ""
+
+func denver_cpu_reg_dump
+	adr	x6, denver_regs
+	mrs	x8, ACTLR_EL1
+	ret
+endfunc denver_cpu_reg_dump
+
+declare_cpu_ops denver, DENVER_MIDR_PN0, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN1, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN2, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN3, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
+
+declare_cpu_ops denver, DENVER_MIDR_PN4, \
+	denver_reset_func, \
+	denver_core_pwr_dwn, \
+	denver_cluster_pwr_dwn
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index 1c5512e9..31adfb42 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -1,57 +1,170 @@
 #
-# Copyright (c) 2014, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
 #
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-#
-# Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# Neither the name of ARM nor the names of its contributors may be used
-# to endorse or promote products derived from this software without specific
-# prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
+# SPDX-License-Identifier: BSD-3-Clause
 #
 
 # Cortex A57 specific optimisation to skip L1 cache flush when
 # cluster is powered down.
 SKIP_A57_L1_FLUSH_PWR_DWN	?=0
 
+# Flag to disable the cache non-temporal hint.
+# It is enabled by default.
+A53_DISABLE_NON_TEMPORAL_HINT	?=1
+
+# Flag to disable the cache non-temporal hint.
+# It is enabled by default.
+A57_DISABLE_NON_TEMPORAL_HINT	?=1
+
 # Process SKIP_A57_L1_FLUSH_PWR_DWN flag
 $(eval $(call assert_boolean,SKIP_A57_L1_FLUSH_PWR_DWN))
 $(eval $(call add_define,SKIP_A57_L1_FLUSH_PWR_DWN))
 
+# Process A53_DISABLE_NON_TEMPORAL_HINT flag
+$(eval $(call assert_boolean,A53_DISABLE_NON_TEMPORAL_HINT))
+$(eval $(call add_define,A53_DISABLE_NON_TEMPORAL_HINT))
+
+# Process A57_DISABLE_NON_TEMPORAL_HINT flag
+$(eval $(call assert_boolean,A57_DISABLE_NON_TEMPORAL_HINT))
+$(eval $(call add_define,A57_DISABLE_NON_TEMPORAL_HINT))
+
+
+# CPU Errata Build flags.
+# These should be enabled by the platform if the erratum workaround needs to be
+# applied.
 
-# CPU Errata Build flags. These should be enabled by the
-# platform if the errata needs to be applied.
+# Flag to apply erratum 826319 workaround during reset. This erratum applies
+# only to revision <= r0p2 of the Cortex A53 cpu.
+ERRATA_A53_826319	?=0
 
-# Flag to apply errata 806969 during reset. This errata applies only to
-# revision r0p0 of the Cortex A57 cpu.
+# Flag to apply erratum 835769 workaround at compile and link time.  This
+# erratum applies to revision <= r0p4 of the Cortex A53 cpu. Enabling this
+# workaround can lead the linker to create "*.stub" sections.
+ERRATA_A53_835769	?=0
+
+# Flag to apply erratum 836870 workaround during reset. This erratum applies
+# only to revision <= r0p3 of the Cortex A53 cpu. From r0p4 and onwards, this
+# erratum workaround is enabled by default in hardware.
+ERRATA_A53_836870	?=0
+
+# Flag to apply erratum 843419 workaround at link time.
+# This erratum applies to revision <= r0p4 of the Cortex A53 cpu. Enabling this
+# workaround could lead the linker to emit "*.stub" sections which are 4kB
+# aligned.
+ERRATA_A53_843419	?=0
+
+# Flag to apply errata 855873 during reset. This errata applies to all
+# revisions of the Cortex A53 CPU, but this firmware workaround only works
+# for revisions r0p3 and higher. Earlier revisions are taken care
+# of by the rich OS.
+ERRATA_A53_855873	?=0
+
+# Flag to apply erratum 806969 workaround during reset. This erratum applies
+# only to revision r0p0 of the Cortex A57 cpu.
 ERRATA_A57_806969	?=0
 
-# Flag to apply errata 813420 during reset. This errata applies only to
-# revision r0p0 of the Cortex A57 cpu.
+# Flag to apply erratum 813419 workaround during reset. This erratum applies
+# only to revision r0p0 of the Cortex A57 cpu.
+ERRATA_A57_813419	?=0
+
+# Flag to apply erratum 813420  workaround during reset. This erratum applies
+# only to revision r0p0 of the Cortex A57 cpu.
 ERRATA_A57_813420	?=0
 
+# Flag to apply erratum 826974 workaround during reset. This erratum applies
+# only to revision <= r1p1 of the Cortex A57 cpu.
+ERRATA_A57_826974	?=0
+
+# Flag to apply erratum 826977 workaround during reset. This erratum applies
+# only to revision <= r1p1 of the Cortex A57 cpu.
+ERRATA_A57_826977	?=0
+
+# Flag to apply erratum 828024 workaround during reset. This erratum applies
+# only to revision <= r1p1 of the Cortex A57 cpu.
+ERRATA_A57_828024	?=0
+
+# Flag to apply erratum 829520 workaround during reset. This erratum applies
+# only to revision <= r1p2 of the Cortex A57 cpu.
+ERRATA_A57_829520	?=0
+
+# Flag to apply erratum 833471 workaround during reset. This erratum applies
+# only to revision <= r1p2 of the Cortex A57 cpu.
+ERRATA_A57_833471	?=0
+
+# Flag to apply erratum 855972 workaround during reset. This erratum applies
+# only to revision <= r1p3 of the Cortex A57 cpu.
+ERRATA_A57_859972	?=0
+
+# Flag to apply erratum 855971 workaround during reset. This erratum applies
+# only to revision <= r0p3 of the Cortex A72 cpu.
+ERRATA_A72_859971	?=0
+
+# Process ERRATA_A53_826319 flag
+$(eval $(call assert_boolean,ERRATA_A53_826319))
+$(eval $(call add_define,ERRATA_A53_826319))
+
+# Process ERRATA_A53_835769 flag
+$(eval $(call assert_boolean,ERRATA_A53_835769))
+$(eval $(call add_define,ERRATA_A53_835769))
+
+# Process ERRATA_A53_836870 flag
+$(eval $(call assert_boolean,ERRATA_A53_836870))
+$(eval $(call add_define,ERRATA_A53_836870))
+
+# Process ERRATA_A53_843419 flag
+$(eval $(call assert_boolean,ERRATA_A53_843419))
+$(eval $(call add_define,ERRATA_A53_843419))
+
+# Process ERRATA_A53_855873 flag
+$(eval $(call assert_boolean,ERRATA_A53_855873))
+$(eval $(call add_define,ERRATA_A53_855873))
+
 # Process ERRATA_A57_806969 flag
 $(eval $(call assert_boolean,ERRATA_A57_806969))
 $(eval $(call add_define,ERRATA_A57_806969))
 
+# Process ERRATA_A57_813419 flag
+$(eval $(call assert_boolean,ERRATA_A57_813419))
+$(eval $(call add_define,ERRATA_A57_813419))
+
 # Process ERRATA_A57_813420 flag
 $(eval $(call assert_boolean,ERRATA_A57_813420))
 $(eval $(call add_define,ERRATA_A57_813420))
+
+# Process ERRATA_A57_826974 flag
+$(eval $(call assert_boolean,ERRATA_A57_826974))
+$(eval $(call add_define,ERRATA_A57_826974))
+
+# Process ERRATA_A57_826977 flag
+$(eval $(call assert_boolean,ERRATA_A57_826977))
+$(eval $(call add_define,ERRATA_A57_826977))
+
+# Process ERRATA_A57_828024 flag
+$(eval $(call assert_boolean,ERRATA_A57_828024))
+$(eval $(call add_define,ERRATA_A57_828024))
+
+# Process ERRATA_A57_829520 flag
+$(eval $(call assert_boolean,ERRATA_A57_829520))
+$(eval $(call add_define,ERRATA_A57_829520))
+
+# Process ERRATA_A57_833471 flag
+$(eval $(call assert_boolean,ERRATA_A57_833471))
+$(eval $(call add_define,ERRATA_A57_833471))
+
+# Process ERRATA_A57_859972 flag
+$(eval $(call assert_boolean,ERRATA_A57_859972))
+$(eval $(call add_define,ERRATA_A57_859972))
+
+# Process ERRATA_A72_859971 flag
+$(eval $(call assert_boolean,ERRATA_A72_859971))
+$(eval $(call add_define,ERRATA_A72_859971))
+
+# Errata build flags
+ifneq (${ERRATA_A53_843419},0)
+TF_LDFLAGS_aarch64	+= --fix-cortex-a53-843419
+endif
+
+ifneq (${ERRATA_A53_835769},0)
+TF_CFLAGS_aarch64	+= -mfix-cortex-a53-835769
+TF_LDFLAGS_aarch64	+= --fix-cortex-a53-835769
+endif
diff --git a/lib/cpus/errata_report.c b/lib/cpus/errata_report.c
new file mode 100644
index 00000000..8d9f704a
--- /dev/null
+++ b/lib/cpus/errata_report.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+/* Runtime firmware routines to report errata status for the current CPU. */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <cpu_data.h>
+#include <debug.h>
+#include <errata_report.h>
+#include <spinlock.h>
+#include <utils.h>
+
+#ifdef IMAGE_BL1
+# define BL_STRING	"BL1"
+#elif defined(AARCH64) && defined(IMAGE_BL31)
+# define BL_STRING	"BL31"
+#elif defined(AARCH32) && defined(IMAGE_BL32)
+# define BL_STRING	"BL32"
+#else
+# error This image should not be printing errata status
+#endif
+
+/* Errata format: BL stage, CPU, errata ID, message */
+#define ERRATA_FORMAT	"%s: %s: errata workaround for %s was %s\n"
+
+/*
+ * Returns whether errata needs to be reported. Passed arguments are private to
+ * a CPU type.
+ */
+int errata_needs_reporting(spinlock_t *lock, uint32_t *reported)
+{
+	int report_now;
+
+	/* If already reported, return false. */
+	if (*reported)
+		return 0;
+
+	/*
+	 * Acquire lock. Determine whether status needs reporting, and then mark
+	 * report status to true.
+	 */
+	spin_lock(lock);
+	report_now = !(*reported);
+	if (report_now)
+		*reported = 1;
+	spin_unlock(lock);
+
+	return report_now;
+}
+
+/*
+ * Print errata status message.
+ *
+ * Unknown: WARN
+ * Missing: WARN
+ * Applied: INFO
+ * Not applied: VERBOSE
+ */
+void errata_print_msg(unsigned int status, const char *cpu, const char *id)
+{
+	/* Errata status strings */
+	static const char *const errata_status_str[] = {
+		[ERRATA_NOT_APPLIES] = "not applied",
+		[ERRATA_APPLIES] = "applied",
+		[ERRATA_MISSING] = "missing!"
+	};
+	static const char *const __unused bl_str = BL_STRING;
+	const char *msg __unused;
+
+
+	assert(status < ARRAY_SIZE(errata_status_str));
+	assert(cpu);
+	assert(id);
+
+	msg = errata_status_str[status];
+
+	switch (status) {
+	case ERRATA_NOT_APPLIES:
+		VERBOSE(ERRATA_FORMAT, bl_str, cpu, id, msg);
+		break;
+
+	case ERRATA_APPLIES:
+		INFO(ERRATA_FORMAT, bl_str, cpu, id, msg);
+		break;
+
+	case ERRATA_MISSING:
+		WARN(ERRATA_FORMAT, bl_str, cpu, id, msg);
+		break;
+
+	default:
+		WARN(ERRATA_FORMAT, bl_str, cpu, id, "unknown");
+		break;
+	}
+}
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
new file mode 100644
index 00000000..3e7a5b73
--- /dev/null
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context.h>
+#include <context_mgmt.h>
+#include <platform.h>
+#include <platform_def.h>
+#include <smcc_helpers.h>
+#include <string.h>
+#include <utils.h>
+
+/*******************************************************************************
+ * Context management library initialisation routine. This library is used by
+ * runtime services to share pointers to 'cpu_context' structures for the secure
+ * and non-secure states. Management of the structures and their associated
+ * memory is not done by the context management library e.g. the PSCI service
+ * manages the cpu context used for entry from and exit to the non-secure state.
+ * The Secure payload manages the context(s) corresponding to the secure state.
+ * It also uses this library to get access to the non-secure
+ * state cpu context pointers.
+ ******************************************************************************/
+void cm_init(void)
+{
+	/*
+	 * The context management library has only global data to initialize, but
+	 * that will be done when the BSS is zeroed out
+	 */
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context 'ctx' for
+ * first use, and sets the initial entrypoint state as specified by the
+ * entry_point_info structure.
+ *
+ * The security state to initialize is determined by the SECURE attribute
+ * of the entry_point_info. The function returns a pointer to the initialized
+ * context and sets this as the next context to return to.
+ *
+ * The EE and ST attributes are used to configure the endianness and secure
+ * timer availability for the new execution context.
+ *
+ * To prepare the register state for entry call cm_prepare_el3_exit() and
+ * el3_exit(). For Secure-EL1 cm_prepare_el3_exit() is equivalent to
+ * cm_e1_sysreg_context_restore().
+ ******************************************************************************/
+static void cm_init_context_common(cpu_context_t *ctx, const entry_point_info_t *ep)
+{
+	unsigned int security_state;
+	uint32_t scr, sctlr;
+	regs_t *reg_ctx;
+
+	assert(ctx);
+
+	security_state = GET_SECURITY_STATE(ep->h.attr);
+
+	/* Clear any residual register values from the context */
+	zeromem(ctx, sizeof(*ctx));
+
+	reg_ctx = get_regs_ctx(ctx);
+
+	/*
+	 * Base the context SCR on the current value, adjust for entry point
+	 * specific requirements
+	 */
+	scr = read_scr();
+	scr &= ~(SCR_NS_BIT | SCR_HCE_BIT);
+
+	if (security_state != SECURE)
+		scr |= SCR_NS_BIT;
+
+	if (security_state != SECURE) {
+		/*
+		 * Set up SCTLR for the Non-secure context.
+		 *
+		 * SCTLR.EE: Endianness is taken from the entrypoint attributes.
+		 *
+		 * SCTLR.M, SCTLR.C and SCTLR.I: These fields must be zero (as
+		 *  required by PSCI specification)
+		 *
+		 * Set remaining SCTLR fields to their architecturally defined
+		 * values. Some fields reset to an IMPLEMENTATION DEFINED value:
+		 *
+		 * SCTLR.TE: Set to zero so that exceptions to an Exception
+		 *  Level executing at PL1 are taken to A32 state.
+		 *
+		 * SCTLR.V: Set to zero to select the normal exception vectors
+		 *  with base address held in VBAR.
+		 */
+		assert(((ep->spsr >> SPSR_E_SHIFT) & SPSR_E_MASK) ==
+			(EP_GET_EE(ep->h.attr) >> EP_EE_SHIFT));
+
+		sctlr = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
+		sctlr |= (SCTLR_RESET_VAL & ~(SCTLR_TE_BIT | SCTLR_V_BIT));
+		write_ctx_reg(reg_ctx, CTX_NS_SCTLR, sctlr);
+	}
+
+	/*
+	 * The target exception level is based on the spsr mode requested. If
+	 * execution is requested to hyp mode, HVC is enabled via SCR.HCE.
+	 */
+	if (GET_M32(ep->spsr) == MODE32_hyp)
+		scr |= SCR_HCE_BIT;
+
+	/*
+	 * Store the initialised values for SCTLR and SCR in the cpu_context.
+	 * The Hyp mode registers are not part of the saved context and are
+	 * set-up in cm_prepare_el3_exit().
+	 */
+	write_ctx_reg(reg_ctx, CTX_SCR, scr);
+	write_ctx_reg(reg_ctx, CTX_LR, ep->pc);
+	write_ctx_reg(reg_ctx, CTX_SPSR, ep->spsr);
+
+	/*
+	 * Store the r0-r3 value from the entrypoint into the context
+	 * Use memcpy as we are in control of the layout of the structures
+	 */
+	memcpy((void *)reg_ctx, (void *)&ep->args, sizeof(aapcs32_params_t));
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context for a CPU specified by
+ * its `cpu_idx` for first use, and sets the initial entrypoint state as
+ * specified by the entry_point_info structure.
+ ******************************************************************************/
+void cm_init_context_by_index(unsigned int cpu_idx,
+			      const entry_point_info_t *ep)
+{
+	cpu_context_t *ctx;
+	ctx = cm_get_context_by_index(cpu_idx, GET_SECURITY_STATE(ep->h.attr));
+	cm_init_context_common(ctx, ep);
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context for the current CPU
+ * for first use, and sets the initial entrypoint state as specified by the
+ * entry_point_info structure.
+ ******************************************************************************/
+void cm_init_my_context(const entry_point_info_t *ep)
+{
+	cpu_context_t *ctx;
+	ctx = cm_get_context(GET_SECURITY_STATE(ep->h.attr));
+	cm_init_context_common(ctx, ep);
+}
+
+/*******************************************************************************
+ * Prepare the CPU system registers for first entry into secure or normal world
+ *
+ * If execution is requested to hyp mode, HSCTLR is initialized
+ * If execution is requested to non-secure PL1, and the CPU supports
+ * HYP mode then HYP mode is disabled by configuring all necessary HYP mode
+ * registers.
+ ******************************************************************************/
+void cm_prepare_el3_exit(uint32_t security_state)
+{
+	uint32_t hsctlr, scr;
+	cpu_context_t *ctx = cm_get_context(security_state);
+
+	assert(ctx);
+
+	if (security_state == NON_SECURE) {
+		scr = read_ctx_reg(get_regs_ctx(ctx), CTX_SCR);
+		if (scr & SCR_HCE_BIT) {
+			/* Use SCTLR value to initialize HSCTLR */
+			hsctlr = read_ctx_reg(get_regs_ctx(ctx),
+						 CTX_NS_SCTLR);
+			hsctlr |= HSCTLR_RES1;
+			/* Temporarily set the NS bit to access HSCTLR */
+			write_scr(read_scr() | SCR_NS_BIT);
+			/*
+			 * Make sure the write to SCR is complete so that
+			 * we can access HSCTLR
+			 */
+			isb();
+			write_hsctlr(hsctlr);
+			isb();
+
+			write_scr(read_scr() & ~SCR_NS_BIT);
+			isb();
+		} else if (read_id_pfr1() &
+			(ID_PFR1_VIRTEXT_MASK << ID_PFR1_VIRTEXT_SHIFT)) {
+			/*
+			 * Set the NS bit to access NS copies of certain banked
+			 * registers
+			 */
+			write_scr(read_scr() | SCR_NS_BIT);
+			isb();
+
+			/*
+			 * Hyp / PL2 present but unused, need to disable safely.
+			 * HSCTLR can be ignored in this case.
+			 *
+			 * Set HCR to its architectural reset value so that
+			 * Non-secure operations do not trap to Hyp mode.
+			 */
+			write_hcr(HCR_RESET_VAL);
+
+			/*
+			 * Set HCPTR to its architectural reset value so that
+			 * Non-secure access from EL1 or EL0 to trace and to
+			 * Advanced SIMD and floating point functionality does
+			 * not trap to Hyp mode.
+			 */
+			write_hcptr(HCPTR_RESET_VAL);
+
+			/*
+			 * Initialise CNTHCTL. All fields are architecturally
+			 * UNKNOWN on reset and are set to zero except for
+			 * field(s) listed below.
+			 *
+			 * CNTHCTL.PL1PCEN: Disable traps to Hyp mode of
+			 *  Non-secure EL0 and EL1 accessed to the physical
+			 *  timer registers.
+			 *
+			 * CNTHCTL.PL1PCTEN: Disable traps to Hyp mode of
+			 *  Non-secure EL0 and EL1 accessed to the physical
+			 *  counter registers.
+			 */
+			write_cnthctl(CNTHCTL_RESET_VAL |
+					PL1PCEN_BIT | PL1PCTEN_BIT);
+
+			/*
+			 * Initialise CNTVOFF to zero as it resets to an
+			 * IMPLEMENTATION DEFINED value.
+			 */
+			write64_cntvoff(0);
+
+			/*
+			 * Set VPIDR and VMPIDR to match MIDR_EL1 and MPIDR
+			 * respectively.
+			 */
+			write_vpidr(read_midr());
+			write_vmpidr(read_mpidr());
+
+			/*
+			 * Initialise VTTBR, setting all fields rather than
+			 * relying on the hw. Some fields are architecturally
+			 * UNKNOWN at reset.
+			 *
+			 * VTTBR.VMID: Set to zero which is the architecturally
+			 *  defined reset value. Even though EL1&0 stage 2
+			 *  address translation is disabled, cache maintenance
+			 *  operations depend on the VMID.
+			 *
+			 * VTTBR.BADDR: Set to zero as EL1&0 stage 2 address
+			 *  translation is disabled.
+			 */
+			write64_vttbr(VTTBR_RESET_VAL &
+				~((VTTBR_VMID_MASK << VTTBR_VMID_SHIFT)
+				| (VTTBR_BADDR_MASK << VTTBR_BADDR_SHIFT)));
+
+			/*
+			 * Initialise HDCR, setting all the fields rather than
+			 * relying on hw.
+			 *
+			 * HDCR.HPMN: Set to value of PMCR.N which is the
+			 *  architecturally-defined reset value.
+			 */
+			write_hdcr(HDCR_RESET_VAL |
+				((read_pmcr() & PMCR_N_BITS) >> PMCR_N_SHIFT));
+
+			/*
+			 * Set HSTR to its architectural reset value so that
+			 * access to system registers in the cproc=1111
+			 * encoding space do not trap to Hyp mode.
+			 */
+			write_hstr(HSTR_RESET_VAL);
+			/*
+			 * Set CNTHP_CTL to its architectural reset value to
+			 * disable the EL2 physical timer and prevent timer
+			 * interrupts. Some fields are architecturally UNKNOWN
+			 * on reset and are set to zero.
+			 */
+			write_cnthp_ctl(CNTHP_CTL_RESET_VAL);
+			isb();
+
+			write_scr(read_scr() & ~SCR_NS_BIT);
+			isb();
+		}
+	}
+}
diff --git a/lib/el3_runtime/aarch32/cpu_data.S b/lib/el3_runtime/aarch32/cpu_data.S
new file mode 100644
index 00000000..68d64151
--- /dev/null
+++ b/lib/el3_runtime/aarch32/cpu_data.S
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+#include <cpu_data.h>
+
+	.globl	_cpu_data
+	.globl	_cpu_data_by_index
+
+/* -----------------------------------------------------------------
+ * cpu_data_t *_cpu_data(void)
+ *
+ * Return the cpu_data structure for the current CPU.
+ * -----------------------------------------------------------------
+ */
+func _cpu_data
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
+	bl	plat_my_core_pos
+	pop	{r12, lr}
+	b	_cpu_data_by_index
+endfunc _cpu_data
+
+/* -----------------------------------------------------------------
+ * cpu_data_t *_cpu_data_by_index(uint32_t cpu_index)
+ *
+ * Return the cpu_data structure for the CPU with given linear index
+ *
+ * This can be called without a valid stack.
+ * clobbers: r0, r1
+ * -----------------------------------------------------------------
+ */
+func _cpu_data_by_index
+	mov_imm	r1, CPU_DATA_SIZE
+	mul	r0, r0, r1
+	ldr	r1, =percpu_data
+	add	r0, r0, r1
+	bx	lr
+endfunc _cpu_data_by_index
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
new file mode 100644
index 00000000..db16a9f0
--- /dev/null
+++ b/lib/el3_runtime/aarch64/context.S
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <context.h>
+
+	.global	el1_sysregs_context_save
+	.global el1_sysregs_context_save_post_ops
+	.global	el1_sysregs_context_restore
+#if CTX_INCLUDE_FPREGS
+	.global	fpregs_context_save
+	.global	fpregs_context_restore
+#endif
+	.global	save_gp_registers
+	.global	restore_gp_registers_eret
+	.global	restore_gp_registers_callee_eret
+	.global	el3_exit
+
+/* -----------------------------------------------------
+ * The following function strictly follows the AArch64
+ * PCS to use x9-x17 (temporary caller-saved registers)
+ * to save EL1 system register context. It assumes that
+ * 'x0' is pointing to a 'el1_sys_regs' structure where
+ * the register context will be saved.
+ * -----------------------------------------------------
+ */
+func el1_sysregs_context_save
+
+	mrs	x9, spsr_el1
+	mrs	x10, elr_el1
+	stp	x9, x10, [x0, #CTX_SPSR_EL1]
+
+	mrs	x15, sctlr_el1
+	mrs	x16, actlr_el1
+	stp	x15, x16, [x0, #CTX_SCTLR_EL1]
+
+	mrs	x17, cpacr_el1
+	mrs	x9, csselr_el1
+	stp	x17, x9, [x0, #CTX_CPACR_EL1]
+
+	mrs	x10, sp_el1
+	mrs	x11, esr_el1
+	stp	x10, x11, [x0, #CTX_SP_EL1]
+
+	mrs	x12, ttbr0_el1
+	mrs	x13, ttbr1_el1
+	stp	x12, x13, [x0, #CTX_TTBR0_EL1]
+
+	mrs	x14, mair_el1
+	mrs	x15, amair_el1
+	stp	x14, x15, [x0, #CTX_MAIR_EL1]
+
+	mrs	x16, tcr_el1
+	mrs	x17, tpidr_el1
+	stp	x16, x17, [x0, #CTX_TCR_EL1]
+
+	mrs	x9, tpidr_el0
+	mrs	x10, tpidrro_el0
+	stp	x9, x10, [x0, #CTX_TPIDR_EL0]
+
+	mrs	x13, par_el1
+	mrs	x14, far_el1
+	stp	x13, x14, [x0, #CTX_PAR_EL1]
+
+	mrs	x15, afsr0_el1
+	mrs	x16, afsr1_el1
+	stp	x15, x16, [x0, #CTX_AFSR0_EL1]
+
+	mrs	x17, contextidr_el1
+	mrs	x9, vbar_el1
+	stp	x17, x9, [x0, #CTX_CONTEXTIDR_EL1]
+
+	mrs	x10, pmcr_el0
+	str	x10, [x0, #CTX_PMCR_EL0]
+
+	/* Save AArch32 system registers if the build has instructed so */
+#if CTX_INCLUDE_AARCH32_REGS
+	mrs	x11, spsr_abt
+	mrs	x12, spsr_und
+	stp	x11, x12, [x0, #CTX_SPSR_ABT]
+
+	mrs	x13, spsr_irq
+	mrs	x14, spsr_fiq
+	stp	x13, x14, [x0, #CTX_SPSR_IRQ]
+
+	mrs	x15, dacr32_el2
+	mrs	x16, ifsr32_el2
+	stp	x15, x16, [x0, #CTX_DACR32_EL2]
+
+	mrs	x17, fpexc32_el2
+	str	x17, [x0, #CTX_FP_FPEXC32_EL2]
+#endif
+
+	/* Save NS timer registers if the build has instructed so */
+#if NS_TIMER_SWITCH
+	mrs	x10, cntp_ctl_el0
+	mrs	x11, cntp_cval_el0
+	stp	x10, x11, [x0, #CTX_CNTP_CTL_EL0]
+
+	mrs	x12, cntv_ctl_el0
+	mrs	x13, cntv_cval_el0
+	stp	x12, x13, [x0, #CTX_CNTV_CTL_EL0]
+
+	mrs	x14, cntkctl_el1
+	str	x14, [x0, #CTX_CNTKCTL_EL1]
+#endif
+
+	ret
+endfunc el1_sysregs_context_save
+
+/* -----------------------------------------------------
+ * The following function strictly follows the AArch64
+ * PCS to use x9-x17 (temporary caller-saved registers)
+ * to do post operations after saving the EL1 system
+ * register context.
+ * -----------------------------------------------------
+ */
+func el1_sysregs_context_save_post_ops
+#if ENABLE_SPE_FOR_LOWER_ELS
+	/* Detect if SPE is implemented */
+	mrs	x9, id_aa64dfr0_el1
+	ubfx	x9, x9, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
+	cmp	x9, #0x1
+	b.ne	1f
+
+	/*
+	 * Before switching from normal world to secure world
+	 * the profiling buffers need to be drained out to memory.  This is
+	 * required to avoid an invalid memory access when TTBR is switched
+	 * for entry to SEL1.
+	 */
+	.arch	armv8.2-a+profile
+	psb	csync
+	dsb	nsh
+	.arch	armv8-a
+1:
+#endif
+	ret
+endfunc el1_sysregs_context_save_post_ops
+
+/* -----------------------------------------------------
+ * The following function strictly follows the AArch64
+ * PCS to use x9-x17 (temporary caller-saved registers)
+ * to restore EL1 system register context.  It assumes
+ * that 'x0' is pointing to a 'el1_sys_regs' structure
+ * from where the register context will be restored
+ * -----------------------------------------------------
+ */
+func el1_sysregs_context_restore
+
+	ldp	x9, x10, [x0, #CTX_SPSR_EL1]
+	msr	spsr_el1, x9
+	msr	elr_el1, x10
+
+	ldp	x15, x16, [x0, #CTX_SCTLR_EL1]
+	msr	sctlr_el1, x15
+	msr	actlr_el1, x16
+
+	ldp	x17, x9, [x0, #CTX_CPACR_EL1]
+	msr	cpacr_el1, x17
+	msr	csselr_el1, x9
+
+	ldp	x10, x11, [x0, #CTX_SP_EL1]
+	msr	sp_el1, x10
+	msr	esr_el1, x11
+
+	ldp	x12, x13, [x0, #CTX_TTBR0_EL1]
+	msr	ttbr0_el1, x12
+	msr	ttbr1_el1, x13
+
+	ldp	x14, x15, [x0, #CTX_MAIR_EL1]
+	msr	mair_el1, x14
+	msr	amair_el1, x15
+
+	ldp	x16, x17, [x0, #CTX_TCR_EL1]
+	msr	tcr_el1, x16
+	msr	tpidr_el1, x17
+
+	ldp	x9, x10, [x0, #CTX_TPIDR_EL0]
+	msr	tpidr_el0, x9
+	msr	tpidrro_el0, x10
+
+	ldp	x13, x14, [x0, #CTX_PAR_EL1]
+	msr	par_el1, x13
+	msr	far_el1, x14
+
+	ldp	x15, x16, [x0, #CTX_AFSR0_EL1]
+	msr	afsr0_el1, x15
+	msr	afsr1_el1, x16
+
+	ldp	x17, x9, [x0, #CTX_CONTEXTIDR_EL1]
+	msr	contextidr_el1, x17
+	msr	vbar_el1, x9
+
+	ldr	x10, [x0, #CTX_PMCR_EL0]
+	msr	pmcr_el0, x10
+
+	/* Restore AArch32 system registers if the build has instructed so */
+#if CTX_INCLUDE_AARCH32_REGS
+	ldp	x11, x12, [x0, #CTX_SPSR_ABT]
+	msr	spsr_abt, x11
+	msr	spsr_und, x12
+
+	ldp	x13, x14, [x0, #CTX_SPSR_IRQ]
+	msr	spsr_irq, x13
+	msr	spsr_fiq, x14
+
+	ldp	x15, x16, [x0, #CTX_DACR32_EL2]
+	msr	dacr32_el2, x15
+	msr	ifsr32_el2, x16
+
+	ldr	x17, [x0, #CTX_FP_FPEXC32_EL2]
+	msr	fpexc32_el2, x17
+#endif
+	/* Restore NS timer registers if the build has instructed so */
+#if NS_TIMER_SWITCH
+	ldp	x10, x11, [x0, #CTX_CNTP_CTL_EL0]
+	msr	cntp_ctl_el0, x10
+	msr	cntp_cval_el0, x11
+
+	ldp	x12, x13, [x0, #CTX_CNTV_CTL_EL0]
+	msr	cntv_ctl_el0, x12
+	msr	cntv_cval_el0, x13
+
+	ldr	x14, [x0, #CTX_CNTKCTL_EL1]
+	msr	cntkctl_el1, x14
+#endif
+
+	/* No explict ISB required here as ERET covers it */
+	ret
+endfunc el1_sysregs_context_restore
+
+/* -----------------------------------------------------
+ * The following function follows the aapcs_64 strictly
+ * to use x9-x17 (temporary caller-saved registers
+ * according to AArch64 PCS) to save floating point
+ * register context. It assumes that 'x0' is pointing to
+ * a 'fp_regs' structure where the register context will
+ * be saved.
+ *
+ * Access to VFP registers will trap if CPTR_EL3.TFP is
+ * set.  However currently we don't use VFP registers
+ * nor set traps in Trusted Firmware, and assume it's
+ * cleared
+ *
+ * TODO: Revisit when VFP is used in secure world
+ * -----------------------------------------------------
+ */
+#if CTX_INCLUDE_FPREGS
+func fpregs_context_save
+	stp	q0, q1, [x0, #CTX_FP_Q0]
+	stp	q2, q3, [x0, #CTX_FP_Q2]
+	stp	q4, q5, [x0, #CTX_FP_Q4]
+	stp	q6, q7, [x0, #CTX_FP_Q6]
+	stp	q8, q9, [x0, #CTX_FP_Q8]
+	stp	q10, q11, [x0, #CTX_FP_Q10]
+	stp	q12, q13, [x0, #CTX_FP_Q12]
+	stp	q14, q15, [x0, #CTX_FP_Q14]
+	stp	q16, q17, [x0, #CTX_FP_Q16]
+	stp	q18, q19, [x0, #CTX_FP_Q18]
+	stp	q20, q21, [x0, #CTX_FP_Q20]
+	stp	q22, q23, [x0, #CTX_FP_Q22]
+	stp	q24, q25, [x0, #CTX_FP_Q24]
+	stp	q26, q27, [x0, #CTX_FP_Q26]
+	stp	q28, q29, [x0, #CTX_FP_Q28]
+	stp	q30, q31, [x0, #CTX_FP_Q30]
+
+	mrs	x9, fpsr
+	str	x9, [x0, #CTX_FP_FPSR]
+
+	mrs	x10, fpcr
+	str	x10, [x0, #CTX_FP_FPCR]
+
+	ret
+endfunc fpregs_context_save
+
+/* -----------------------------------------------------
+ * The following function follows the aapcs_64 strictly
+ * to use x9-x17 (temporary caller-saved registers
+ * according to AArch64 PCS) to restore floating point
+ * register context. It assumes that 'x0' is pointing to
+ * a 'fp_regs' structure from where the register context
+ * will be restored.
+ *
+ * Access to VFP registers will trap if CPTR_EL3.TFP is
+ * set.  However currently we don't use VFP registers
+ * nor set traps in Trusted Firmware, and assume it's
+ * cleared
+ *
+ * TODO: Revisit when VFP is used in secure world
+ * -----------------------------------------------------
+ */
+func fpregs_context_restore
+	ldp	q0, q1, [x0, #CTX_FP_Q0]
+	ldp	q2, q3, [x0, #CTX_FP_Q2]
+	ldp	q4, q5, [x0, #CTX_FP_Q4]
+	ldp	q6, q7, [x0, #CTX_FP_Q6]
+	ldp	q8, q9, [x0, #CTX_FP_Q8]
+	ldp	q10, q11, [x0, #CTX_FP_Q10]
+	ldp	q12, q13, [x0, #CTX_FP_Q12]
+	ldp	q14, q15, [x0, #CTX_FP_Q14]
+	ldp	q16, q17, [x0, #CTX_FP_Q16]
+	ldp	q18, q19, [x0, #CTX_FP_Q18]
+	ldp	q20, q21, [x0, #CTX_FP_Q20]
+	ldp	q22, q23, [x0, #CTX_FP_Q22]
+	ldp	q24, q25, [x0, #CTX_FP_Q24]
+	ldp	q26, q27, [x0, #CTX_FP_Q26]
+	ldp	q28, q29, [x0, #CTX_FP_Q28]
+	ldp	q30, q31, [x0, #CTX_FP_Q30]
+
+	ldr	x9, [x0, #CTX_FP_FPSR]
+	msr	fpsr, x9
+
+	ldr	x10, [x0, #CTX_FP_FPCR]
+	msr	fpcr, x10
+
+	/*
+	 * No explict ISB required here as ERET to
+	 * switch to secure EL1 or non-secure world
+	 * covers it
+	 */
+
+	ret
+endfunc fpregs_context_restore
+#endif /* CTX_INCLUDE_FPREGS */
+
+/* -----------------------------------------------------
+ * The following functions are used to save and restore
+ * all the general purpose registers. Ideally we would
+ * only save and restore the callee saved registers when
+ * a world switch occurs but that type of implementation
+ * is more complex. So currently we will always save and
+ * restore these registers on entry and exit of EL3.
+ * These are not macros to ensure their invocation fits
+ * within the 32 instructions per exception vector.
+ * clobbers: x18
+ * -----------------------------------------------------
+ */
+func save_gp_registers
+	stp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	stp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	stp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	stp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	stp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	stp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	stp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	stp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	stp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	stp	x18, x19, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+	stp	x20, x21, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X20]
+	stp	x22, x23, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X22]
+	stp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
+	stp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
+	stp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+	mrs	x18, sp_el0
+	str	x18, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_SP_EL0]
+	ret
+endfunc save_gp_registers
+
+func restore_gp_registers_eret
+	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+	ldp	x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+	b	restore_gp_registers_callee_eret
+endfunc restore_gp_registers_eret
+
+func restore_gp_registers_callee_eret
+	ldp	x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
+	ldp	x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
+	ldp	x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
+	ldp	x10, x11, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X10]
+	ldp	x12, x13, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X12]
+	ldp	x14, x15, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X14]
+	ldp	x18, x19, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X18]
+	ldp	x20, x21, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X20]
+	ldp	x22, x23, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X22]
+	ldp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
+	ldp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
+	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
+	ldp	x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	msr	sp_el0, x17
+	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
+	eret
+endfunc	restore_gp_registers_callee_eret
+
+	/* -----------------------------------------------------
+	 * This routine assumes that the SP_EL3 is pointing to
+	 * a valid context structure from where the gp regs and
+	 * other special registers can be retrieved.
+	 * -----------------------------------------------------
+	 */
+func el3_exit
+	/* -----------------------------------------------------
+	 * Save the current SP_EL0 i.e. the EL3 runtime stack
+	 * which will be used for handling the next SMC. Then
+	 * switch to SP_EL3
+	 * -----------------------------------------------------
+	 */
+	mov	x17, sp
+	msr	spsel, #1
+	str	x17, [sp, #CTX_EL3STATE_OFFSET + CTX_RUNTIME_SP]
+
+	/* -----------------------------------------------------
+	 * Restore SPSR_EL3, ELR_EL3 and SCR_EL3 prior to ERET
+	 * -----------------------------------------------------
+	 */
+	ldr	x18, [sp, #CTX_EL3STATE_OFFSET + CTX_SCR_EL3]
+	ldp	x16, x17, [sp, #CTX_EL3STATE_OFFSET + CTX_SPSR_EL3]
+	msr	scr_el3, x18
+	msr	spsr_el3, x16
+	msr	elr_el3, x17
+
+	/* Restore saved general purpose registers and return */
+	b	restore_gp_registers_eret
+endfunc el3_exit
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
new file mode 100644
index 00000000..c8232df9
--- /dev/null
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context.h>
+#include <context_mgmt.h>
+#include <interrupt_mgmt.h>
+#include <platform.h>
+#include <platform_def.h>
+#include <pubsub_events.h>
+#include <smcc_helpers.h>
+#include <string.h>
+#include <utils.h>
+
+
+/*******************************************************************************
+ * Context management library initialisation routine. This library is used by
+ * runtime services to share pointers to 'cpu_context' structures for the secure
+ * and non-secure states. Management of the structures and their associated
+ * memory is not done by the context management library e.g. the PSCI service
+ * manages the cpu context used for entry from and exit to the non-secure state.
+ * The Secure payload dispatcher service manages the context(s) corresponding to
+ * the secure state. It also uses this library to get access to the non-secure
+ * state cpu context pointers.
+ * Lastly, this library provides the api to make SP_EL3 point to the cpu context
+ * which will used for programming an entry into a lower EL. The same context
+ * will used to save state upon exception entry from that EL.
+ ******************************************************************************/
+void cm_init(void)
+{
+	/*
+	 * The context management library has only global data to intialize, but
+	 * that will be done when the BSS is zeroed out
+	 */
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context 'ctx' for
+ * first use, and sets the initial entrypoint state as specified by the
+ * entry_point_info structure.
+ *
+ * The security state to initialize is determined by the SECURE attribute
+ * of the entry_point_info. The function returns a pointer to the initialized
+ * context and sets this as the next context to return to.
+ *
+ * The EE and ST attributes are used to configure the endianess and secure
+ * timer availability for the new execution context.
+ *
+ * To prepare the register state for entry call cm_prepare_el3_exit() and
+ * el3_exit(). For Secure-EL1 cm_prepare_el3_exit() is equivalent to
+ * cm_e1_sysreg_context_restore().
+ ******************************************************************************/
+static void cm_init_context_common(cpu_context_t *ctx, const entry_point_info_t *ep)
+{
+	unsigned int security_state;
+	uint32_t scr_el3, pmcr_el0;
+	el3_state_t *state;
+	gp_regs_t *gp_regs;
+	unsigned long sctlr_elx;
+
+	assert(ctx);
+
+	security_state = GET_SECURITY_STATE(ep->h.attr);
+
+	/* Clear any residual register values from the context */
+	zeromem(ctx, sizeof(*ctx));
+
+	/*
+	 * SCR_EL3 was initialised during reset sequence in macro
+	 * el3_arch_init_common. This code modifies the SCR_EL3 fields that
+	 * affect the next EL.
+	 *
+	 * The following fields are initially set to zero and then updated to
+	 * the required value depending on the state of the SPSR_EL3 and the
+	 * Security state and entrypoint attributes of the next EL.
+	 */
+	scr_el3 = read_scr();
+	scr_el3 &= ~(SCR_NS_BIT | SCR_RW_BIT | SCR_FIQ_BIT | SCR_IRQ_BIT |
+			SCR_ST_BIT | SCR_HCE_BIT);
+	/*
+	 * SCR_NS: Set the security state of the next EL.
+	 */
+	if (security_state != SECURE)
+		scr_el3 |= SCR_NS_BIT;
+	/*
+	 * SCR_EL3.RW: Set the execution state, AArch32 or AArch64, for next
+	 *  Exception level as specified by SPSR.
+	 */
+	if (GET_RW(ep->spsr) == MODE_RW_64)
+		scr_el3 |= SCR_RW_BIT;
+	/*
+	 * SCR_EL3.ST: Traps Secure EL1 accesses to the Counter-timer Physical
+	 *  Secure timer registers to EL3, from AArch64 state only, if specified
+	 *  by the entrypoint attributes.
+	 */
+	if (EP_GET_ST(ep->h.attr))
+		scr_el3 |= SCR_ST_BIT;
+
+#ifndef HANDLE_EA_EL3_FIRST
+	/*
+	 * SCR_EL3.EA: Do not route External Abort and SError Interrupt External
+	 *  to EL3 when executing at a lower EL. When executing at EL3, External
+	 *  Aborts are taken to EL3.
+	 */
+	scr_el3 &= ~SCR_EA_BIT;
+#endif
+
+#ifdef IMAGE_BL31
+	/*
+	 * SCR_EL3.IRQ, SCR_EL3.FIQ: Enable the physical FIQ and IRQ rounting as
+	 *  indicated by the interrupt routing model for BL31.
+	 */
+	scr_el3 |= get_scr_el3_from_routing_model(security_state);
+#endif
+
+	/*
+	 * SCR_EL3.HCE: Enable HVC instructions if next execution state is
+	 * AArch64 and next EL is EL2, or if next execution state is AArch32 and
+	 * next mode is Hyp.
+	 */
+	if ((GET_RW(ep->spsr) == MODE_RW_64
+	     && GET_EL(ep->spsr) == MODE_EL2)
+	    || (GET_RW(ep->spsr) != MODE_RW_64
+		&& GET_M32(ep->spsr) == MODE32_hyp)) {
+		scr_el3 |= SCR_HCE_BIT;
+	}
+
+	/*
+	 * Initialise SCTLR_EL1 to the reset value corresponding to the target
+	 * execution state setting all fields rather than relying of the hw.
+	 * Some fields have architecturally UNKNOWN reset values and these are
+	 * set to zero.
+	 *
+	 * SCTLR.EE: Endianness is taken from the entrypoint attributes.
+	 *
+	 * SCTLR.M, SCTLR.C and SCTLR.I: These fields must be zero (as
+	 *  required by PSCI specification)
+	 */
+	sctlr_elx = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
+	if (GET_RW(ep->spsr) == MODE_RW_64)
+		sctlr_elx |= SCTLR_EL1_RES1;
+	else {
+		/*
+		 * If the target execution state is AArch32 then the following
+		 * fields need to be set.
+		 *
+		 * SCTRL_EL1.nTWE: Set to one so that EL0 execution of WFE
+		 *  instructions are not trapped to EL1.
+		 *
+		 * SCTLR_EL1.nTWI: Set to one so that EL0 execution of WFI
+		 *  instructions are not trapped to EL1.
+		 *
+		 * SCTLR_EL1.CP15BEN: Set to one to enable EL0 execution of the
+		 *  CP15DMB, CP15DSB, and CP15ISB instructions.
+		 */
+		sctlr_elx |= SCTLR_AARCH32_EL1_RES1 | SCTLR_CP15BEN_BIT
+					| SCTLR_NTWI_BIT | SCTLR_NTWE_BIT;
+	}
+
+	/*
+	 * Store the initialised SCTLR_EL1 value in the cpu_context - SCTLR_EL2
+	 * and other EL2 registers are set up by cm_preapre_ns_entry() as they
+	 * are not part of the stored cpu_context.
+	 */
+	write_ctx_reg(get_sysregs_ctx(ctx), CTX_SCTLR_EL1, sctlr_elx);
+
+	if (security_state == SECURE) {
+		/*
+		 * Initialise PMCR_EL0 for secure context only, setting all
+		 * fields rather than relying on hw. Some fields are
+		 * architecturally UNKNOWN on reset.
+		 *
+		 * PMCR_EL0.LC: Set to one so that cycle counter overflow, that
+		 *  is recorded in PMOVSCLR_EL0[31], occurs on the increment
+		 *  that changes PMCCNTR_EL0[63] from 1 to 0.
+		 *
+		 * PMCR_EL0.DP: Set to one so that the cycle counter,
+		 *  PMCCNTR_EL0 does not count when event counting is prohibited.
+		 *
+		 * PMCR_EL0.X: Set to zero to disable export of events.
+		 *
+		 * PMCR_EL0.D: Set to zero so that, when enabled, PMCCNTR_EL0
+		 *  counts on every clock cycle.
+		 */
+		pmcr_el0 = ((PMCR_EL0_RESET_VAL | PMCR_EL0_LC_BIT
+				| PMCR_EL0_DP_BIT)
+				& ~(PMCR_EL0_X_BIT | PMCR_EL0_D_BIT));
+		write_ctx_reg(get_sysregs_ctx(ctx), CTX_PMCR_EL0, pmcr_el0);
+	}
+
+	/* Populate EL3 state so that we've the right context before doing ERET */
+	state = get_el3state_ctx(ctx);
+	write_ctx_reg(state, CTX_SCR_EL3, scr_el3);
+	write_ctx_reg(state, CTX_ELR_EL3, ep->pc);
+	write_ctx_reg(state, CTX_SPSR_EL3, ep->spsr);
+
+	/*
+	 * Store the X0-X7 value from the entrypoint into the context
+	 * Use memcpy as we are in control of the layout of the structures
+	 */
+	gp_regs = get_gpregs_ctx(ctx);
+	memcpy(gp_regs, (void *)&ep->args, sizeof(aapcs64_params_t));
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context for a CPU specified by
+ * its `cpu_idx` for first use, and sets the initial entrypoint state as
+ * specified by the entry_point_info structure.
+ ******************************************************************************/
+void cm_init_context_by_index(unsigned int cpu_idx,
+			      const entry_point_info_t *ep)
+{
+	cpu_context_t *ctx;
+	ctx = cm_get_context_by_index(cpu_idx, GET_SECURITY_STATE(ep->h.attr));
+	cm_init_context_common(ctx, ep);
+}
+
+/*******************************************************************************
+ * The following function initializes the cpu_context for the current CPU
+ * for first use, and sets the initial entrypoint state as specified by the
+ * entry_point_info structure.
+ ******************************************************************************/
+void cm_init_my_context(const entry_point_info_t *ep)
+{
+	cpu_context_t *ctx;
+	ctx = cm_get_context(GET_SECURITY_STATE(ep->h.attr));
+	cm_init_context_common(ctx, ep);
+}
+
+/*******************************************************************************
+ * Prepare the CPU system registers for first entry into secure or normal world
+ *
+ * If execution is requested to EL2 or hyp mode, SCTLR_EL2 is initialized
+ * If execution is requested to non-secure EL1 or svc mode, and the CPU supports
+ * EL2 then EL2 is disabled by configuring all necessary EL2 registers.
+ * For all entries, the EL1 registers are initialized from the cpu_context
+ ******************************************************************************/
+void cm_prepare_el3_exit(uint32_t security_state)
+{
+	uint32_t sctlr_elx, scr_el3, mdcr_el2;
+	cpu_context_t *ctx = cm_get_context(security_state);
+
+	assert(ctx);
+
+	if (security_state == NON_SECURE) {
+		scr_el3 = read_ctx_reg(get_el3state_ctx(ctx), CTX_SCR_EL3);
+		if (scr_el3 & SCR_HCE_BIT) {
+			/* Use SCTLR_EL1.EE value to initialise sctlr_el2 */
+			sctlr_elx = read_ctx_reg(get_sysregs_ctx(ctx),
+						 CTX_SCTLR_EL1);
+			sctlr_elx &= SCTLR_EE_BIT;
+			sctlr_elx |= SCTLR_EL2_RES1;
+			write_sctlr_el2(sctlr_elx);
+		} else if (EL_IMPLEMENTED(2)) {
+			/*
+			 * EL2 present but unused, need to disable safely.
+			 * SCTLR_EL2 can be ignored in this case.
+			 *
+			 * Initialise all fields in HCR_EL2, except HCR_EL2.RW,
+			 * to zero so that Non-secure operations do not trap to
+			 * EL2.
+			 *
+			 * HCR_EL2.RW: Set this field to match SCR_EL3.RW
+			 */
+			write_hcr_el2((scr_el3 & SCR_RW_BIT) ? HCR_RW_BIT : 0);
+
+			/*
+			 * Initialise CPTR_EL2 setting all fields rather than
+			 * relying on the hw. All fields have architecturally
+			 * UNKNOWN reset values.
+			 *
+			 * CPTR_EL2.TCPAC: Set to zero so that Non-secure EL1
+			 *  accesses to the CPACR_EL1 or CPACR from both
+			 *  Execution states do not trap to EL2.
+			 *
+			 * CPTR_EL2.TTA: Set to zero so that Non-secure System
+			 *  register accesses to the trace registers from both
+			 *  Execution states do not trap to EL2.
+			 *
+			 * CPTR_EL2.TFP: Set to zero so that Non-secure accesses
+			 *  to SIMD and floating-point functionality from both
+			 *  Execution states do not trap to EL2.
+			 */
+			write_cptr_el2(CPTR_EL2_RESET_VAL &
+					~(CPTR_EL2_TCPAC_BIT | CPTR_EL2_TTA_BIT
+					| CPTR_EL2_TFP_BIT));
+
+			/*
+			 * Initiliase CNTHCTL_EL2. All fields are
+			 * architecturally UNKNOWN on reset and are set to zero
+			 * except for field(s) listed below.
+			 *
+			 * CNTHCTL_EL2.EL1PCEN: Set to one to disable traps to
+			 *  Hyp mode of Non-secure EL0 and EL1 accesses to the
+			 *  physical timer registers.
+			 *
+			 * CNTHCTL_EL2.EL1PCTEN: Set to one to disable traps to
+			 *  Hyp mode of  Non-secure EL0 and EL1 accesses to the
+			 *  physical counter registers.
+			 */
+			write_cnthctl_el2(CNTHCTL_RESET_VAL |
+						EL1PCEN_BIT | EL1PCTEN_BIT);
+
+			/*
+			 * Initialise CNTVOFF_EL2 to zero as it resets to an
+			 * architecturally UNKNOWN value.
+			 */
+			write_cntvoff_el2(0);
+
+			/*
+			 * Set VPIDR_EL2 and VMPIDR_EL2 to match MIDR_EL1 and
+			 * MPIDR_EL1 respectively.
+			 */
+			write_vpidr_el2(read_midr_el1());
+			write_vmpidr_el2(read_mpidr_el1());
+
+			/*
+			 * Initialise VTTBR_EL2. All fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * VTTBR_EL2.VMID: Set to zero. Even though EL1&0 stage
+			 *  2 address translation is disabled, cache maintenance
+			 *  operations depend on the VMID.
+			 *
+			 * VTTBR_EL2.BADDR: Set to zero as EL1&0 stage 2 address
+			 *  translation is disabled.
+			 */
+			write_vttbr_el2(VTTBR_RESET_VAL &
+				~((VTTBR_VMID_MASK << VTTBR_VMID_SHIFT)
+				| (VTTBR_BADDR_MASK << VTTBR_BADDR_SHIFT)));
+
+			/*
+			 * Initialise MDCR_EL2, setting all fields rather than
+			 * relying on hw. Some fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
+			 * profiling controls to EL2.
+			 *
+			 * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in non-secure
+			 * state. Accesses to profiling buffer controls at
+			 * non-secure EL1 are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDRA: Set to zero so that Non-secure EL0 and
+			 *  EL1 System register accesses to the Debug ROM
+			 *  registers are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDOSA: Set to zero so that Non-secure EL1
+			 *  System register accesses to the powerdown debug
+			 *  registers are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDA: Set to zero so that System register
+			 *  accesses to the debug registers do not trap to EL2.
+			 *
+			 * MDCR_EL2.TDE: Set to zero so that debug exceptions
+			 *  are not routed to EL2.
+			 *
+			 * MDCR_EL2.HPME: Set to zero to disable EL2 Performance
+			 *  Monitors.
+			 *
+			 * MDCR_EL2.TPM: Set to zero so that Non-secure EL0 and
+			 *  EL1 accesses to all Performance Monitors registers
+			 *  are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TPMCR: Set to zero so that Non-secure EL0
+			 *  and EL1 accesses to the PMCR_EL0 or PMCR are not
+			 *  trapped to EL2.
+			 *
+			 * MDCR_EL2.HPMN: Set to value of PMCR_EL0.N which is the
+			 *  architecturally-defined reset value.
+			 */
+			mdcr_el2 = ((MDCR_EL2_RESET_VAL |
+					((read_pmcr_el0() & PMCR_EL0_N_BITS)
+					>> PMCR_EL0_N_SHIFT)) &
+					~(MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT
+					| MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT
+					| MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT
+					| MDCR_EL2_TPMCR_BIT));
+
+#if ENABLE_SPE_FOR_LOWER_ELS
+			uint64_t id_aa64dfr0_el1;
+
+			/* Detect if SPE is implemented */
+			id_aa64dfr0_el1 = read_id_aa64dfr0_el1() >>
+				ID_AA64DFR0_PMS_SHIFT;
+			if ((id_aa64dfr0_el1 & ID_AA64DFR0_PMS_MASK) == 1) {
+				/*
+				 * Make sure traps to EL2 are not generated if
+				 * EL2 is implemented but not used.
+				 */
+				mdcr_el2 &= ~MDCR_EL2_TPMS;
+				mdcr_el2 |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
+			}
+#endif
+
+			write_mdcr_el2(mdcr_el2);
+
+			/*
+			 * Initialise HSTR_EL2. All fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * HSTR_EL2.T<n>: Set all these fields to zero so that
+			 *  Non-secure EL0 or EL1 accesses to System registers
+			 *  do not trap to EL2.
+			 */
+			write_hstr_el2(HSTR_EL2_RESET_VAL & ~(HSTR_EL2_T_MASK));
+			/*
+			 * Initialise CNTHP_CTL_EL2. All fields are
+			 * architecturally UNKNOWN on reset.
+			 *
+			 * CNTHP_CTL_EL2:ENABLE: Set to zero to disable the EL2
+			 *  physical timer and prevent timer interrupts.
+			 */
+			write_cnthp_ctl_el2(CNTHP_CTL_RESET_VAL &
+						~(CNTHP_CTL_ENABLE_BIT));
+		}
+	}
+
+	cm_el1_sysregs_context_restore(security_state);
+	cm_set_next_eret_context(security_state);
+}
+
+/*******************************************************************************
+ * The next four functions are used by runtime services to save and restore
+ * EL1 context on the 'cpu_context' structure for the specified security
+ * state.
+ ******************************************************************************/
+void cm_el1_sysregs_context_save(uint32_t security_state)
+{
+	cpu_context_t *ctx;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	el1_sysregs_context_save(get_sysregs_ctx(ctx));
+	el1_sysregs_context_save_post_ops();
+
+#if IMAGE_BL31
+	if (security_state == SECURE)
+		PUBLISH_EVENT(cm_exited_secure_world);
+	else
+		PUBLISH_EVENT(cm_exited_normal_world);
+#endif
+}
+
+void cm_el1_sysregs_context_restore(uint32_t security_state)
+{
+	cpu_context_t *ctx;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	el1_sysregs_context_restore(get_sysregs_ctx(ctx));
+
+#if IMAGE_BL31
+	if (security_state == SECURE)
+		PUBLISH_EVENT(cm_entering_secure_world);
+	else
+		PUBLISH_EVENT(cm_entering_normal_world);
+#endif
+}
+
+/*******************************************************************************
+ * This function populates ELR_EL3 member of 'cpu_context' pertaining to the
+ * given security state with the given entrypoint
+ ******************************************************************************/
+void cm_set_elr_el3(uint32_t security_state, uintptr_t entrypoint)
+{
+	cpu_context_t *ctx;
+	el3_state_t *state;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	/* Populate EL3 state so that ERET jumps to the correct entry */
+	state = get_el3state_ctx(ctx);
+	write_ctx_reg(state, CTX_ELR_EL3, entrypoint);
+}
+
+/*******************************************************************************
+ * This function populates ELR_EL3 and SPSR_EL3 members of 'cpu_context'
+ * pertaining to the given security state
+ ******************************************************************************/
+void cm_set_elr_spsr_el3(uint32_t security_state,
+			uintptr_t entrypoint, uint32_t spsr)
+{
+	cpu_context_t *ctx;
+	el3_state_t *state;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	/* Populate EL3 state so that ERET jumps to the correct entry */
+	state = get_el3state_ctx(ctx);
+	write_ctx_reg(state, CTX_ELR_EL3, entrypoint);
+	write_ctx_reg(state, CTX_SPSR_EL3, spsr);
+}
+
+/*******************************************************************************
+ * This function updates a single bit in the SCR_EL3 member of the 'cpu_context'
+ * pertaining to the given security state using the value and bit position
+ * specified in the parameters. It preserves all other bits.
+ ******************************************************************************/
+void cm_write_scr_el3_bit(uint32_t security_state,
+			  uint32_t bit_pos,
+			  uint32_t value)
+{
+	cpu_context_t *ctx;
+	el3_state_t *state;
+	uint32_t scr_el3;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	/* Ensure that the bit position is a valid one */
+	assert((1 << bit_pos) & SCR_VALID_BIT_MASK);
+
+	/* Ensure that the 'value' is only a bit wide */
+	assert(value <= 1);
+
+	/*
+	 * Get the SCR_EL3 value from the cpu context, clear the desired bit
+	 * and set it to its new value.
+	 */
+	state = get_el3state_ctx(ctx);
+	scr_el3 = read_ctx_reg(state, CTX_SCR_EL3);
+	scr_el3 &= ~(1 << bit_pos);
+	scr_el3 |= value << bit_pos;
+	write_ctx_reg(state, CTX_SCR_EL3, scr_el3);
+}
+
+/*******************************************************************************
+ * This function retrieves SCR_EL3 member of 'cpu_context' pertaining to the
+ * given security state.
+ ******************************************************************************/
+uint32_t cm_get_scr_el3(uint32_t security_state)
+{
+	cpu_context_t *ctx;
+	el3_state_t *state;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	/* Populate EL3 state so that ERET jumps to the correct entry */
+	state = get_el3state_ctx(ctx);
+	return read_ctx_reg(state, CTX_SCR_EL3);
+}
+
+/*******************************************************************************
+ * This function is used to program the context that's used for exception
+ * return. This initializes the SP_EL3 to a pointer to a 'cpu_context' set for
+ * the required security state
+ ******************************************************************************/
+void cm_set_next_eret_context(uint32_t security_state)
+{
+	cpu_context_t *ctx;
+
+	ctx = cm_get_context(security_state);
+	assert(ctx);
+
+	cm_set_next_context(ctx);
+}
diff --git a/lib/el3_runtime/aarch64/cpu_data.S b/lib/el3_runtime/aarch64/cpu_data.S
new file mode 100644
index 00000000..96be0813
--- /dev/null
+++ b/lib/el3_runtime/aarch64/cpu_data.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+#include <cpu_data.h>
+
+.globl	init_cpu_data_ptr
+.globl	_cpu_data_by_index
+
+/* -----------------------------------------------------------------
+ * void init_cpu_data_ptr(void)
+ *
+ * Initialise the TPIDR_EL3 register to refer to the cpu_data_t
+ * for the calling CPU. This must be called before cm_get_cpu_data()
+ *
+ * This can be called without a valid stack. It assumes that
+ * plat_my_core_pos() does not clobber register x10.
+ * clobbers: x0, x1, x10
+ * -----------------------------------------------------------------
+ */
+func init_cpu_data_ptr
+	mov	x10, x30
+	bl	plat_my_core_pos
+	bl	_cpu_data_by_index
+	msr	tpidr_el3, x0
+	ret	x10
+endfunc init_cpu_data_ptr
+
+/* -----------------------------------------------------------------
+ * cpu_data_t *_cpu_data_by_index(uint32_t cpu_index)
+ *
+ * Return the cpu_data structure for the CPU with given linear index
+ *
+ * This can be called without a valid stack.
+ * clobbers: x0, x1
+ * -----------------------------------------------------------------
+ */
+func _cpu_data_by_index
+	mov_imm	x1, CPU_DATA_SIZE
+	mul	x0, x0, x1
+	adr	x1, percpu_data
+	add	x0, x0, x1
+	ret
+endfunc _cpu_data_by_index
diff --git a/lib/el3_runtime/cpu_data_array.c b/lib/el3_runtime/cpu_data_array.c
new file mode 100644
index 00000000..10b1a7c5
--- /dev/null
+++ b/lib/el3_runtime/cpu_data_array.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <cassert.h>
+#include <cpu_data.h>
+#include <platform_def.h>
+
+/* The per_cpu_ptr_cache_t space allocation */
+cpu_data_t percpu_data[PLATFORM_CORE_COUNT];
diff --git a/lib/libfdt/fdt.c b/lib/libfdt/fdt.c
new file mode 100644
index 00000000..22286a1a
--- /dev/null
+++ b/lib/libfdt/fdt.c
@@ -0,0 +1,251 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+int fdt_check_header(const void *fdt)
+{
+	if (fdt_magic(fdt) == FDT_MAGIC) {
+		/* Complete tree */
+		if (fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+			return -FDT_ERR_BADVERSION;
+		if (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION)
+			return -FDT_ERR_BADVERSION;
+	} else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+		/* Unfinished sequential-write blob */
+		if (fdt_size_dt_struct(fdt) == 0)
+			return -FDT_ERR_BADSTATE;
+	} else {
+		return -FDT_ERR_BADMAGIC;
+	}
+
+	return 0;
+}
+
+const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len)
+{
+	unsigned absoffset = offset + fdt_off_dt_struct(fdt);
+
+	if ((absoffset < offset)
+	    || ((absoffset + len) < absoffset)
+	    || (absoffset + len) > fdt_totalsize(fdt))
+		return NULL;
+
+	if (fdt_version(fdt) >= 0x11)
+		if (((offset + len) < offset)
+		    || ((offset + len) > fdt_size_dt_struct(fdt)))
+			return NULL;
+
+	return _fdt_offset_ptr(fdt, offset);
+}
+
+uint32_t fdt_next_tag(const void *fdt, int startoffset, int *nextoffset)
+{
+	const fdt32_t *tagp, *lenp;
+	uint32_t tag;
+	int offset = startoffset;
+	const char *p;
+
+	*nextoffset = -FDT_ERR_TRUNCATED;
+	tagp = fdt_offset_ptr(fdt, offset, FDT_TAGSIZE);
+	if (!tagp)
+		return FDT_END; /* premature end */
+	tag = fdt32_to_cpu(*tagp);
+	offset += FDT_TAGSIZE;
+
+	*nextoffset = -FDT_ERR_BADSTRUCTURE;
+	switch (tag) {
+	case FDT_BEGIN_NODE:
+		/* skip name */
+		do {
+			p = fdt_offset_ptr(fdt, offset++, 1);
+		} while (p && (*p != '\0'));
+		if (!p)
+			return FDT_END; /* premature end */
+		break;
+
+	case FDT_PROP:
+		lenp = fdt_offset_ptr(fdt, offset, sizeof(*lenp));
+		if (!lenp)
+			return FDT_END; /* premature end */
+		/* skip-name offset, length and value */
+		offset += sizeof(struct fdt_property) - FDT_TAGSIZE
+			+ fdt32_to_cpu(*lenp);
+		break;
+
+	case FDT_END:
+	case FDT_END_NODE:
+	case FDT_NOP:
+		break;
+
+	default:
+		return FDT_END;
+	}
+
+	if (!fdt_offset_ptr(fdt, startoffset, offset - startoffset))
+		return FDT_END; /* premature end */
+
+	*nextoffset = FDT_TAGALIGN(offset);
+	return tag;
+}
+
+int _fdt_check_node_offset(const void *fdt, int offset)
+{
+	if ((offset < 0) || (offset % FDT_TAGSIZE)
+	    || (fdt_next_tag(fdt, offset, &offset) != FDT_BEGIN_NODE))
+		return -FDT_ERR_BADOFFSET;
+
+	return offset;
+}
+
+int _fdt_check_prop_offset(const void *fdt, int offset)
+{
+	if ((offset < 0) || (offset % FDT_TAGSIZE)
+	    || (fdt_next_tag(fdt, offset, &offset) != FDT_PROP))
+		return -FDT_ERR_BADOFFSET;
+
+	return offset;
+}
+
+int fdt_next_node(const void *fdt, int offset, int *depth)
+{
+	int nextoffset = 0;
+	uint32_t tag;
+
+	if (offset >= 0)
+		if ((nextoffset = _fdt_check_node_offset(fdt, offset)) < 0)
+			return nextoffset;
+
+	do {
+		offset = nextoffset;
+		tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+		switch (tag) {
+		case FDT_PROP:
+		case FDT_NOP:
+			break;
+
+		case FDT_BEGIN_NODE:
+			if (depth)
+				(*depth)++;
+			break;
+
+		case FDT_END_NODE:
+			if (depth && ((--(*depth)) < 0))
+				return nextoffset;
+			break;
+
+		case FDT_END:
+			if ((nextoffset >= 0)
+			    || ((nextoffset == -FDT_ERR_TRUNCATED) && !depth))
+				return -FDT_ERR_NOTFOUND;
+			else
+				return nextoffset;
+		}
+	} while (tag != FDT_BEGIN_NODE);
+
+	return offset;
+}
+
+int fdt_first_subnode(const void *fdt, int offset)
+{
+	int depth = 0;
+
+	offset = fdt_next_node(fdt, offset, &depth);
+	if (offset < 0 || depth != 1)
+		return -FDT_ERR_NOTFOUND;
+
+	return offset;
+}
+
+int fdt_next_subnode(const void *fdt, int offset)
+{
+	int depth = 1;
+
+	/*
+	 * With respect to the parent, the depth of the next subnode will be
+	 * the same as the last.
+	 */
+	do {
+		offset = fdt_next_node(fdt, offset, &depth);
+		if (offset < 0 || depth < 1)
+			return -FDT_ERR_NOTFOUND;
+	} while (depth > 1);
+
+	return offset;
+}
+
+const char *_fdt_find_string(const char *strtab, int tabsize, const char *s)
+{
+	int len = strlen(s) + 1;
+	const char *last = strtab + tabsize - len;
+	const char *p;
+
+	for (p = strtab; p <= last; p++)
+		if (memcmp(p, s, len) == 0)
+			return p;
+	return NULL;
+}
+
+int fdt_move(const void *fdt, void *buf, int bufsize)
+{
+	FDT_CHECK_HEADER(fdt);
+
+	if (fdt_totalsize(fdt) > bufsize)
+		return -FDT_ERR_NOSPACE;
+
+	memmove(buf, fdt, fdt_totalsize(fdt));
+	return 0;
+}
diff --git a/lib/libfdt/fdt_addresses.c b/lib/libfdt/fdt_addresses.c
new file mode 100644
index 00000000..eff4dbcc
--- /dev/null
+++ b/lib/libfdt/fdt_addresses.c
@@ -0,0 +1,96 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au>
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+int fdt_address_cells(const void *fdt, int nodeoffset)
+{
+	const fdt32_t *ac;
+	int val;
+	int len;
+
+	ac = fdt_getprop(fdt, nodeoffset, "#address-cells", &len);
+	if (!ac)
+		return 2;
+
+	if (len != sizeof(*ac))
+		return -FDT_ERR_BADNCELLS;
+
+	val = fdt32_to_cpu(*ac);
+	if ((val <= 0) || (val > FDT_MAX_NCELLS))
+		return -FDT_ERR_BADNCELLS;
+
+	return val;
+}
+
+int fdt_size_cells(const void *fdt, int nodeoffset)
+{
+	const fdt32_t *sc;
+	int val;
+	int len;
+
+	sc = fdt_getprop(fdt, nodeoffset, "#size-cells", &len);
+	if (!sc)
+		return 2;
+
+	if (len != sizeof(*sc))
+		return -FDT_ERR_BADNCELLS;
+
+	val = fdt32_to_cpu(*sc);
+	if ((val < 0) || (val > FDT_MAX_NCELLS))
+		return -FDT_ERR_BADNCELLS;
+
+	return val;
+}
diff --git a/lib/libfdt/fdt_empty_tree.c b/lib/libfdt/fdt_empty_tree.c
new file mode 100644
index 00000000..f72d13b1
--- /dev/null
+++ b/lib/libfdt/fdt_empty_tree.c
@@ -0,0 +1,84 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+int fdt_create_empty_tree(void *buf, int bufsize)
+{
+	int err;
+
+	err = fdt_create(buf, bufsize);
+	if (err)
+		return err;
+
+	err = fdt_finish_reservemap(buf);
+	if (err)
+		return err;
+
+	err = fdt_begin_node(buf, "");
+	if (err)
+		return err;
+
+	err =  fdt_end_node(buf);
+	if (err)
+		return err;
+
+	err = fdt_finish(buf);
+	if (err)
+		return err;
+
+	return fdt_open_into(buf, buf, bufsize);
+}
+
diff --git a/lib/libfdt/fdt_ro.c b/lib/libfdt/fdt_ro.c
new file mode 100644
index 00000000..04590984
--- /dev/null
+++ b/lib/libfdt/fdt_ro.c
@@ -0,0 +1,703 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+static int _fdt_nodename_eq(const void *fdt, int offset,
+			    const char *s, int len)
+{
+	const char *p = fdt_offset_ptr(fdt, offset + FDT_TAGSIZE, len+1);
+
+	if (! p)
+		/* short match */
+		return 0;
+
+	if (memcmp(p, s, len) != 0)
+		return 0;
+
+	if (p[len] == '\0')
+		return 1;
+	else if (!memchr(s, '@', len) && (p[len] == '@'))
+		return 1;
+	else
+		return 0;
+}
+
+const char *fdt_string(const void *fdt, int stroffset)
+{
+	return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset;
+}
+
+static int _fdt_string_eq(const void *fdt, int stroffset,
+			  const char *s, int len)
+{
+	const char *p = fdt_string(fdt, stroffset);
+
+	return (strlen(p) == len) && (memcmp(p, s, len) == 0);
+}
+
+uint32_t fdt_get_max_phandle(const void *fdt)
+{
+	uint32_t max_phandle = 0;
+	int offset;
+
+	for (offset = fdt_next_node(fdt, -1, NULL);;
+	     offset = fdt_next_node(fdt, offset, NULL)) {
+		uint32_t phandle;
+
+		if (offset == -FDT_ERR_NOTFOUND)
+			return max_phandle;
+
+		if (offset < 0)
+			return (uint32_t)-1;
+
+		phandle = fdt_get_phandle(fdt, offset);
+		if (phandle == (uint32_t)-1)
+			continue;
+
+		if (phandle > max_phandle)
+			max_phandle = phandle;
+	}
+
+	return 0;
+}
+
+int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
+{
+	FDT_CHECK_HEADER(fdt);
+	*address = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->address);
+	*size = fdt64_to_cpu(_fdt_mem_rsv(fdt, n)->size);
+	return 0;
+}
+
+int fdt_num_mem_rsv(const void *fdt)
+{
+	int i = 0;
+
+	while (fdt64_to_cpu(_fdt_mem_rsv(fdt, i)->size) != 0)
+		i++;
+	return i;
+}
+
+static int _nextprop(const void *fdt, int offset)
+{
+	uint32_t tag;
+	int nextoffset;
+
+	do {
+		tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+		switch (tag) {
+		case FDT_END:
+			if (nextoffset >= 0)
+				return -FDT_ERR_BADSTRUCTURE;
+			else
+				return nextoffset;
+
+		case FDT_PROP:
+			return offset;
+		}
+		offset = nextoffset;
+	} while (tag == FDT_NOP);
+
+	return -FDT_ERR_NOTFOUND;
+}
+
+int fdt_subnode_offset_namelen(const void *fdt, int offset,
+			       const char *name, int namelen)
+{
+	int depth;
+
+	FDT_CHECK_HEADER(fdt);
+
+	for (depth = 0;
+	     (offset >= 0) && (depth >= 0);
+	     offset = fdt_next_node(fdt, offset, &depth))
+		if ((depth == 1)
+		    && _fdt_nodename_eq(fdt, offset, name, namelen))
+			return offset;
+
+	if (depth < 0)
+		return -FDT_ERR_NOTFOUND;
+	return offset; /* error */
+}
+
+int fdt_subnode_offset(const void *fdt, int parentoffset,
+		       const char *name)
+{
+	return fdt_subnode_offset_namelen(fdt, parentoffset, name, strlen(name));
+}
+
+int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen)
+{
+	const char *end = path + namelen;
+	const char *p = path;
+	int offset = 0;
+
+	FDT_CHECK_HEADER(fdt);
+
+	/* see if we have an alias */
+	if (*path != '/') {
+		const char *q = memchr(path, '/', end - p);
+
+		if (!q)
+			q = end;
+
+		p = fdt_get_alias_namelen(fdt, p, q - p);
+		if (!p)
+			return -FDT_ERR_BADPATH;
+		offset = fdt_path_offset(fdt, p);
+
+		p = q;
+	}
+
+	while (p < end) {
+		const char *q;
+
+		while (*p == '/') {
+			p++;
+			if (p == end)
+				return offset;
+		}
+		q = memchr(p, '/', end - p);
+		if (! q)
+			q = end;
+
+		offset = fdt_subnode_offset_namelen(fdt, offset, p, q-p);
+		if (offset < 0)
+			return offset;
+
+		p = q;
+	}
+
+	return offset;
+}
+
+int fdt_path_offset(const void *fdt, const char *path)
+{
+	return fdt_path_offset_namelen(fdt, path, strlen(path));
+}
+
+const char *fdt_get_name(const void *fdt, int nodeoffset, int *len)
+{
+	const struct fdt_node_header *nh = _fdt_offset_ptr(fdt, nodeoffset);
+	int err;
+
+	if (((err = fdt_check_header(fdt)) != 0)
+	    || ((err = _fdt_check_node_offset(fdt, nodeoffset)) < 0))
+			goto fail;
+
+	if (len)
+		*len = strlen(nh->name);
+
+	return nh->name;
+
+ fail:
+	if (len)
+		*len = err;
+	return NULL;
+}
+
+int fdt_first_property_offset(const void *fdt, int nodeoffset)
+{
+	int offset;
+
+	if ((offset = _fdt_check_node_offset(fdt, nodeoffset)) < 0)
+		return offset;
+
+	return _nextprop(fdt, offset);
+}
+
+int fdt_next_property_offset(const void *fdt, int offset)
+{
+	if ((offset = _fdt_check_prop_offset(fdt, offset)) < 0)
+		return offset;
+
+	return _nextprop(fdt, offset);
+}
+
+const struct fdt_property *fdt_get_property_by_offset(const void *fdt,
+						      int offset,
+						      int *lenp)
+{
+	int err;
+	const struct fdt_property *prop;
+
+	if ((err = _fdt_check_prop_offset(fdt, offset)) < 0) {
+		if (lenp)
+			*lenp = err;
+		return NULL;
+	}
+
+	prop = _fdt_offset_ptr(fdt, offset);
+
+	if (lenp)
+		*lenp = fdt32_to_cpu(prop->len);
+
+	return prop;
+}
+
+const struct fdt_property *fdt_get_property_namelen(const void *fdt,
+						    int offset,
+						    const char *name,
+						    int namelen, int *lenp)
+{
+	for (offset = fdt_first_property_offset(fdt, offset);
+	     (offset >= 0);
+	     (offset = fdt_next_property_offset(fdt, offset))) {
+		const struct fdt_property *prop;
+
+		if (!(prop = fdt_get_property_by_offset(fdt, offset, lenp))) {
+			offset = -FDT_ERR_INTERNAL;
+			break;
+		}
+		if (_fdt_string_eq(fdt, fdt32_to_cpu(prop->nameoff),
+				   name, namelen))
+			return prop;
+	}
+
+	if (lenp)
+		*lenp = offset;
+	return NULL;
+}
+
+const struct fdt_property *fdt_get_property(const void *fdt,
+					    int nodeoffset,
+					    const char *name, int *lenp)
+{
+	return fdt_get_property_namelen(fdt, nodeoffset, name,
+					strlen(name), lenp);
+}
+
+const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
+				const char *name, int namelen, int *lenp)
+{
+	const struct fdt_property *prop;
+
+	prop = fdt_get_property_namelen(fdt, nodeoffset, name, namelen, lenp);
+	if (! prop)
+		return NULL;
+
+	return prop->data;
+}
+
+const void *fdt_getprop_by_offset(const void *fdt, int offset,
+				  const char **namep, int *lenp)
+{
+	const struct fdt_property *prop;
+
+	prop = fdt_get_property_by_offset(fdt, offset, lenp);
+	if (!prop)
+		return NULL;
+	if (namep)
+		*namep = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+	return prop->data;
+}
+
+const void *fdt_getprop(const void *fdt, int nodeoffset,
+			const char *name, int *lenp)
+{
+	return fdt_getprop_namelen(fdt, nodeoffset, name, strlen(name), lenp);
+}
+
+uint32_t fdt_get_phandle(const void *fdt, int nodeoffset)
+{
+	const fdt32_t *php;
+	int len;
+
+	/* FIXME: This is a bit sub-optimal, since we potentially scan
+	 * over all the properties twice. */
+	php = fdt_getprop(fdt, nodeoffset, "phandle", &len);
+	if (!php || (len != sizeof(*php))) {
+		php = fdt_getprop(fdt, nodeoffset, "linux,phandle", &len);
+		if (!php || (len != sizeof(*php)))
+			return 0;
+	}
+
+	return fdt32_to_cpu(*php);
+}
+
+const char *fdt_get_alias_namelen(const void *fdt,
+				  const char *name, int namelen)
+{
+	int aliasoffset;
+
+	aliasoffset = fdt_path_offset(fdt, "/aliases");
+	if (aliasoffset < 0)
+		return NULL;
+
+	return fdt_getprop_namelen(fdt, aliasoffset, name, namelen, NULL);
+}
+
+const char *fdt_get_alias(const void *fdt, const char *name)
+{
+	return fdt_get_alias_namelen(fdt, name, strlen(name));
+}
+
+int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen)
+{
+	int pdepth = 0, p = 0;
+	int offset, depth, namelen;
+	const char *name;
+
+	FDT_CHECK_HEADER(fdt);
+
+	if (buflen < 2)
+		return -FDT_ERR_NOSPACE;
+
+	for (offset = 0, depth = 0;
+	     (offset >= 0) && (offset <= nodeoffset);
+	     offset = fdt_next_node(fdt, offset, &depth)) {
+		while (pdepth > depth) {
+			do {
+				p--;
+			} while (buf[p-1] != '/');
+			pdepth--;
+		}
+
+		if (pdepth >= depth) {
+			name = fdt_get_name(fdt, offset, &namelen);
+			if (!name)
+				return namelen;
+			if ((p + namelen + 1) <= buflen) {
+				memcpy(buf + p, name, namelen);
+				p += namelen;
+				buf[p++] = '/';
+				pdepth++;
+			}
+		}
+
+		if (offset == nodeoffset) {
+			if (pdepth < (depth + 1))
+				return -FDT_ERR_NOSPACE;
+
+			if (p > 1) /* special case so that root path is "/", not "" */
+				p--;
+			buf[p] = '\0';
+			return 0;
+		}
+	}
+
+	if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0))
+		return -FDT_ERR_BADOFFSET;
+	else if (offset == -FDT_ERR_BADOFFSET)
+		return -FDT_ERR_BADSTRUCTURE;
+
+	return offset; /* error from fdt_next_node() */
+}
+
+int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
+				 int supernodedepth, int *nodedepth)
+{
+	int offset, depth;
+	int supernodeoffset = -FDT_ERR_INTERNAL;
+
+	FDT_CHECK_HEADER(fdt);
+
+	if (supernodedepth < 0)
+		return -FDT_ERR_NOTFOUND;
+
+	for (offset = 0, depth = 0;
+	     (offset >= 0) && (offset <= nodeoffset);
+	     offset = fdt_next_node(fdt, offset, &depth)) {
+		if (depth == supernodedepth)
+			supernodeoffset = offset;
+
+		if (offset == nodeoffset) {
+			if (nodedepth)
+				*nodedepth = depth;
+
+			if (supernodedepth > depth)
+				return -FDT_ERR_NOTFOUND;
+			else
+				return supernodeoffset;
+		}
+	}
+
+	if ((offset == -FDT_ERR_NOTFOUND) || (offset >= 0))
+		return -FDT_ERR_BADOFFSET;
+	else if (offset == -FDT_ERR_BADOFFSET)
+		return -FDT_ERR_BADSTRUCTURE;
+
+	return offset; /* error from fdt_next_node() */
+}
+
+int fdt_node_depth(const void *fdt, int nodeoffset)
+{
+	int nodedepth;
+	int err;
+
+	err = fdt_supernode_atdepth_offset(fdt, nodeoffset, 0, &nodedepth);
+	if (err)
+		return (err < 0) ? err : -FDT_ERR_INTERNAL;
+	return nodedepth;
+}
+
+int fdt_parent_offset(const void *fdt, int nodeoffset)
+{
+	int nodedepth = fdt_node_depth(fdt, nodeoffset);
+
+	if (nodedepth < 0)
+		return nodedepth;
+	return fdt_supernode_atdepth_offset(fdt, nodeoffset,
+					    nodedepth - 1, NULL);
+}
+
+int fdt_node_offset_by_prop_value(const void *fdt, int startoffset,
+				  const char *propname,
+				  const void *propval, int proplen)
+{
+	int offset;
+	const void *val;
+	int len;
+
+	FDT_CHECK_HEADER(fdt);
+
+	/* FIXME: The algorithm here is pretty horrible: we scan each
+	 * property of a node in fdt_getprop(), then if that didn't
+	 * find what we want, we scan over them again making our way
+	 * to the next node.  Still it's the easiest to implement
+	 * approach; performance can come later. */
+	for (offset = fdt_next_node(fdt, startoffset, NULL);
+	     offset >= 0;
+	     offset = fdt_next_node(fdt, offset, NULL)) {
+		val = fdt_getprop(fdt, offset, propname, &len);
+		if (val && (len == proplen)
+		    && (memcmp(val, propval, len) == 0))
+			return offset;
+	}
+
+	return offset; /* error from fdt_next_node() */
+}
+
+int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle)
+{
+	int offset;
+
+	if ((phandle == 0) || (phandle == -1))
+		return -FDT_ERR_BADPHANDLE;
+
+	FDT_CHECK_HEADER(fdt);
+
+	/* FIXME: The algorithm here is pretty horrible: we
+	 * potentially scan each property of a node in
+	 * fdt_get_phandle(), then if that didn't find what
+	 * we want, we scan over them again making our way to the next
+	 * node.  Still it's the easiest to implement approach;
+	 * performance can come later. */
+	for (offset = fdt_next_node(fdt, -1, NULL);
+	     offset >= 0;
+	     offset = fdt_next_node(fdt, offset, NULL)) {
+		if (fdt_get_phandle(fdt, offset) == phandle)
+			return offset;
+	}
+
+	return offset; /* error from fdt_next_node() */
+}
+
+int fdt_stringlist_contains(const char *strlist, int listlen, const char *str)
+{
+	int len = strlen(str);
+	const char *p;
+
+	while (listlen >= len) {
+		if (memcmp(str, strlist, len+1) == 0)
+			return 1;
+		p = memchr(strlist, '\0', listlen);
+		if (!p)
+			return 0; /* malformed strlist.. */
+		listlen -= (p-strlist) + 1;
+		strlist = p + 1;
+	}
+	return 0;
+}
+
+int fdt_stringlist_count(const void *fdt, int nodeoffset, const char *property)
+{
+	const char *list, *end;
+	int length, count = 0;
+
+	list = fdt_getprop(fdt, nodeoffset, property, &length);
+	if (!list)
+		return -length;
+
+	end = list + length;
+
+	while (list < end) {
+		length = strnlen(list, end - list) + 1;
+
+		/* Abort if the last string isn't properly NUL-terminated. */
+		if (list + length > end)
+			return -FDT_ERR_BADVALUE;
+
+		list += length;
+		count++;
+	}
+
+	return count;
+}
+
+int fdt_stringlist_search(const void *fdt, int nodeoffset, const char *property,
+			  const char *string)
+{
+	int length, len, idx = 0;
+	const char *list, *end;
+
+	list = fdt_getprop(fdt, nodeoffset, property, &length);
+	if (!list)
+		return -length;
+
+	len = strlen(string) + 1;
+	end = list + length;
+
+	while (list < end) {
+		length = strnlen(list, end - list) + 1;
+
+		/* Abort if the last string isn't properly NUL-terminated. */
+		if (list + length > end)
+			return -FDT_ERR_BADVALUE;
+
+		if (length == len && memcmp(list, string, length) == 0)
+			return idx;
+
+		list += length;
+		idx++;
+	}
+
+	return -FDT_ERR_NOTFOUND;
+}
+
+const char *fdt_stringlist_get(const void *fdt, int nodeoffset,
+			       const char *property, int idx,
+			       int *lenp)
+{
+	const char *list, *end;
+	int length;
+
+	list = fdt_getprop(fdt, nodeoffset, property, &length);
+	if (!list) {
+		if (lenp)
+			*lenp = length;
+
+		return NULL;
+	}
+
+	end = list + length;
+
+	while (list < end) {
+		length = strnlen(list, end - list) + 1;
+
+		/* Abort if the last string isn't properly NUL-terminated. */
+		if (list + length > end) {
+			if (lenp)
+				*lenp = -FDT_ERR_BADVALUE;
+
+			return NULL;
+		}
+
+		if (idx == 0) {
+			if (lenp)
+				*lenp = length - 1;
+
+			return list;
+		}
+
+		list += length;
+		idx--;
+	}
+
+	if (lenp)
+		*lenp = -FDT_ERR_NOTFOUND;
+
+	return NULL;
+}
+
+int fdt_node_check_compatible(const void *fdt, int nodeoffset,
+			      const char *compatible)
+{
+	const void *prop;
+	int len;
+
+	prop = fdt_getprop(fdt, nodeoffset, "compatible", &len);
+	if (!prop)
+		return len;
+
+	return !fdt_stringlist_contains(prop, len, compatible);
+}
+
+int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
+				  const char *compatible)
+{
+	int offset, err;
+
+	FDT_CHECK_HEADER(fdt);
+
+	/* FIXME: The algorithm here is pretty horrible: we scan each
+	 * property of a node in fdt_node_check_compatible(), then if
+	 * that didn't find what we want, we scan over them again
+	 * making our way to the next node.  Still it's the easiest to
+	 * implement approach; performance can come later. */
+	for (offset = fdt_next_node(fdt, startoffset, NULL);
+	     offset >= 0;
+	     offset = fdt_next_node(fdt, offset, NULL)) {
+		err = fdt_node_check_compatible(fdt, offset, compatible);
+		if ((err < 0) && (err != -FDT_ERR_NOTFOUND))
+			return err;
+		else if (err == 0)
+			return offset;
+	}
+
+	return offset; /* error from fdt_next_node() */
+}
diff --git a/lib/libfdt/fdt_rw.c b/lib/libfdt/fdt_rw.c
new file mode 100644
index 00000000..2eed4f58
--- /dev/null
+++ b/lib/libfdt/fdt_rw.c
@@ -0,0 +1,490 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+static int _fdt_blocks_misordered(const void *fdt,
+			      int mem_rsv_size, int struct_size)
+{
+	return (fdt_off_mem_rsvmap(fdt) < FDT_ALIGN(sizeof(struct fdt_header), 8))
+		|| (fdt_off_dt_struct(fdt) <
+		    (fdt_off_mem_rsvmap(fdt) + mem_rsv_size))
+		|| (fdt_off_dt_strings(fdt) <
+		    (fdt_off_dt_struct(fdt) + struct_size))
+		|| (fdt_totalsize(fdt) <
+		    (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt)));
+}
+
+static int _fdt_rw_check_header(void *fdt)
+{
+	FDT_CHECK_HEADER(fdt);
+
+	if (fdt_version(fdt) < 17)
+		return -FDT_ERR_BADVERSION;
+	if (_fdt_blocks_misordered(fdt, sizeof(struct fdt_reserve_entry),
+				   fdt_size_dt_struct(fdt)))
+		return -FDT_ERR_BADLAYOUT;
+	if (fdt_version(fdt) > 17)
+		fdt_set_version(fdt, 17);
+
+	return 0;
+}
+
+#define FDT_RW_CHECK_HEADER(fdt) \
+	{ \
+		int __err; \
+		if ((__err = _fdt_rw_check_header(fdt)) != 0) \
+			return __err; \
+	}
+
+static inline int _fdt_data_size(void *fdt)
+{
+	return fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt);
+}
+
+static int _fdt_splice(void *fdt, void *splicepoint, int oldlen, int newlen)
+{
+	char *p = splicepoint;
+	char *end = (char *)fdt + _fdt_data_size(fdt);
+
+	if (((p + oldlen) < p) || ((p + oldlen) > end))
+		return -FDT_ERR_BADOFFSET;
+	if ((p < (char *)fdt) || ((end - oldlen + newlen) < (char *)fdt))
+		return -FDT_ERR_BADOFFSET;
+	if ((end - oldlen + newlen) > ((char *)fdt + fdt_totalsize(fdt)))
+		return -FDT_ERR_NOSPACE;
+	memmove(p + newlen, p + oldlen, end - p - oldlen);
+	return 0;
+}
+
+static int _fdt_splice_mem_rsv(void *fdt, struct fdt_reserve_entry *p,
+			       int oldn, int newn)
+{
+	int delta = (newn - oldn) * sizeof(*p);
+	int err;
+	err = _fdt_splice(fdt, p, oldn * sizeof(*p), newn * sizeof(*p));
+	if (err)
+		return err;
+	fdt_set_off_dt_struct(fdt, fdt_off_dt_struct(fdt) + delta);
+	fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta);
+	return 0;
+}
+
+static int _fdt_splice_struct(void *fdt, void *p,
+			      int oldlen, int newlen)
+{
+	int delta = newlen - oldlen;
+	int err;
+
+	if ((err = _fdt_splice(fdt, p, oldlen, newlen)))
+		return err;
+
+	fdt_set_size_dt_struct(fdt, fdt_size_dt_struct(fdt) + delta);
+	fdt_set_off_dt_strings(fdt, fdt_off_dt_strings(fdt) + delta);
+	return 0;
+}
+
+static int _fdt_splice_string(void *fdt, int newlen)
+{
+	void *p = (char *)fdt
+		+ fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt);
+	int err;
+
+	if ((err = _fdt_splice(fdt, p, 0, newlen)))
+		return err;
+
+	fdt_set_size_dt_strings(fdt, fdt_size_dt_strings(fdt) + newlen);
+	return 0;
+}
+
+static int _fdt_find_add_string(void *fdt, const char *s)
+{
+	char *strtab = (char *)fdt + fdt_off_dt_strings(fdt);
+	const char *p;
+	char *new;
+	int len = strlen(s) + 1;
+	int err;
+
+	p = _fdt_find_string(strtab, fdt_size_dt_strings(fdt), s);
+	if (p)
+		/* found it */
+		return (p - strtab);
+
+	new = strtab + fdt_size_dt_strings(fdt);
+	err = _fdt_splice_string(fdt, len);
+	if (err)
+		return err;
+
+	memcpy(new, s, len);
+	return (new - strtab);
+}
+
+int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size)
+{
+	struct fdt_reserve_entry *re;
+	int err;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	re = _fdt_mem_rsv_w(fdt, fdt_num_mem_rsv(fdt));
+	err = _fdt_splice_mem_rsv(fdt, re, 0, 1);
+	if (err)
+		return err;
+
+	re->address = cpu_to_fdt64(address);
+	re->size = cpu_to_fdt64(size);
+	return 0;
+}
+
+int fdt_del_mem_rsv(void *fdt, int n)
+{
+	struct fdt_reserve_entry *re = _fdt_mem_rsv_w(fdt, n);
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	if (n >= fdt_num_mem_rsv(fdt))
+		return -FDT_ERR_NOTFOUND;
+
+	return _fdt_splice_mem_rsv(fdt, re, 1, 0);
+}
+
+static int _fdt_resize_property(void *fdt, int nodeoffset, const char *name,
+				int len, struct fdt_property **prop)
+{
+	int oldlen;
+	int err;
+
+	*prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
+	if (! (*prop))
+		return oldlen;
+
+	if ((err = _fdt_splice_struct(fdt, (*prop)->data, FDT_TAGALIGN(oldlen),
+				      FDT_TAGALIGN(len))))
+		return err;
+
+	(*prop)->len = cpu_to_fdt32(len);
+	return 0;
+}
+
+static int _fdt_add_property(void *fdt, int nodeoffset, const char *name,
+			     int len, struct fdt_property **prop)
+{
+	int proplen;
+	int nextoffset;
+	int namestroff;
+	int err;
+
+	if ((nextoffset = _fdt_check_node_offset(fdt, nodeoffset)) < 0)
+		return nextoffset;
+
+	namestroff = _fdt_find_add_string(fdt, name);
+	if (namestroff < 0)
+		return namestroff;
+
+	*prop = _fdt_offset_ptr_w(fdt, nextoffset);
+	proplen = sizeof(**prop) + FDT_TAGALIGN(len);
+
+	err = _fdt_splice_struct(fdt, *prop, 0, proplen);
+	if (err)
+		return err;
+
+	(*prop)->tag = cpu_to_fdt32(FDT_PROP);
+	(*prop)->nameoff = cpu_to_fdt32(namestroff);
+	(*prop)->len = cpu_to_fdt32(len);
+	return 0;
+}
+
+int fdt_set_name(void *fdt, int nodeoffset, const char *name)
+{
+	char *namep;
+	int oldlen, newlen;
+	int err;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen);
+	if (!namep)
+		return oldlen;
+
+	newlen = strlen(name);
+
+	err = _fdt_splice_struct(fdt, namep, FDT_TAGALIGN(oldlen+1),
+				 FDT_TAGALIGN(newlen+1));
+	if (err)
+		return err;
+
+	memcpy(namep, name, newlen+1);
+	return 0;
+}
+
+int fdt_setprop(void *fdt, int nodeoffset, const char *name,
+		const void *val, int len)
+{
+	struct fdt_property *prop;
+	int err;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	err = _fdt_resize_property(fdt, nodeoffset, name, len, &prop);
+	if (err == -FDT_ERR_NOTFOUND)
+		err = _fdt_add_property(fdt, nodeoffset, name, len, &prop);
+	if (err)
+		return err;
+
+	memcpy(prop->data, val, len);
+	return 0;
+}
+
+int fdt_appendprop(void *fdt, int nodeoffset, const char *name,
+		   const void *val, int len)
+{
+	struct fdt_property *prop;
+	int err, oldlen, newlen;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
+	if (prop) {
+		newlen = len + oldlen;
+		err = _fdt_splice_struct(fdt, prop->data,
+					 FDT_TAGALIGN(oldlen),
+					 FDT_TAGALIGN(newlen));
+		if (err)
+			return err;
+		prop->len = cpu_to_fdt32(newlen);
+		memcpy(prop->data + oldlen, val, len);
+	} else {
+		err = _fdt_add_property(fdt, nodeoffset, name, len, &prop);
+		if (err)
+			return err;
+		memcpy(prop->data, val, len);
+	}
+	return 0;
+}
+
+int fdt_delprop(void *fdt, int nodeoffset, const char *name)
+{
+	struct fdt_property *prop;
+	int len, proplen;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
+	if (! prop)
+		return len;
+
+	proplen = sizeof(*prop) + FDT_TAGALIGN(len);
+	return _fdt_splice_struct(fdt, prop, proplen, 0);
+}
+
+int fdt_add_subnode_namelen(void *fdt, int parentoffset,
+			    const char *name, int namelen)
+{
+	struct fdt_node_header *nh;
+	int offset, nextoffset;
+	int nodelen;
+	int err;
+	uint32_t tag;
+	fdt32_t *endtag;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen);
+	if (offset >= 0)
+		return -FDT_ERR_EXISTS;
+	else if (offset != -FDT_ERR_NOTFOUND)
+		return offset;
+
+	/* Try to place the new node after the parent's properties */
+	fdt_next_tag(fdt, parentoffset, &nextoffset); /* skip the BEGIN_NODE */
+	do {
+		offset = nextoffset;
+		tag = fdt_next_tag(fdt, offset, &nextoffset);
+	} while ((tag == FDT_PROP) || (tag == FDT_NOP));
+
+	nh = _fdt_offset_ptr_w(fdt, offset);
+	nodelen = sizeof(*nh) + FDT_TAGALIGN(namelen+1) + FDT_TAGSIZE;
+
+	err = _fdt_splice_struct(fdt, nh, 0, nodelen);
+	if (err)
+		return err;
+
+	nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE);
+	memset(nh->name, 0, FDT_TAGALIGN(namelen+1));
+	memcpy(nh->name, name, namelen);
+	endtag = (fdt32_t *)((char *)nh + nodelen - FDT_TAGSIZE);
+	*endtag = cpu_to_fdt32(FDT_END_NODE);
+
+	return offset;
+}
+
+int fdt_add_subnode(void *fdt, int parentoffset, const char *name)
+{
+	return fdt_add_subnode_namelen(fdt, parentoffset, name, strlen(name));
+}
+
+int fdt_del_node(void *fdt, int nodeoffset)
+{
+	int endoffset;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	endoffset = _fdt_node_end_offset(fdt, nodeoffset);
+	if (endoffset < 0)
+		return endoffset;
+
+	return _fdt_splice_struct(fdt, _fdt_offset_ptr_w(fdt, nodeoffset),
+				  endoffset - nodeoffset, 0);
+}
+
+static void _fdt_packblocks(const char *old, char *new,
+			    int mem_rsv_size, int struct_size)
+{
+	int mem_rsv_off, struct_off, strings_off;
+
+	mem_rsv_off = FDT_ALIGN(sizeof(struct fdt_header), 8);
+	struct_off = mem_rsv_off + mem_rsv_size;
+	strings_off = struct_off + struct_size;
+
+	memmove(new + mem_rsv_off, old + fdt_off_mem_rsvmap(old), mem_rsv_size);
+	fdt_set_off_mem_rsvmap(new, mem_rsv_off);
+
+	memmove(new + struct_off, old + fdt_off_dt_struct(old), struct_size);
+	fdt_set_off_dt_struct(new, struct_off);
+	fdt_set_size_dt_struct(new, struct_size);
+
+	memmove(new + strings_off, old + fdt_off_dt_strings(old),
+		fdt_size_dt_strings(old));
+	fdt_set_off_dt_strings(new, strings_off);
+	fdt_set_size_dt_strings(new, fdt_size_dt_strings(old));
+}
+
+int fdt_open_into(const void *fdt, void *buf, int bufsize)
+{
+	int err;
+	int mem_rsv_size, struct_size;
+	int newsize;
+	const char *fdtstart = fdt;
+	const char *fdtend = fdtstart + fdt_totalsize(fdt);
+	char *tmp;
+
+	FDT_CHECK_HEADER(fdt);
+
+	mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
+		* sizeof(struct fdt_reserve_entry);
+
+	if (fdt_version(fdt) >= 17) {
+		struct_size = fdt_size_dt_struct(fdt);
+	} else {
+		struct_size = 0;
+		while (fdt_next_tag(fdt, struct_size, &struct_size) != FDT_END)
+			;
+		if (struct_size < 0)
+			return struct_size;
+	}
+
+	if (!_fdt_blocks_misordered(fdt, mem_rsv_size, struct_size)) {
+		/* no further work necessary */
+		err = fdt_move(fdt, buf, bufsize);
+		if (err)
+			return err;
+		fdt_set_version(buf, 17);
+		fdt_set_size_dt_struct(buf, struct_size);
+		fdt_set_totalsize(buf, bufsize);
+		return 0;
+	}
+
+	/* Need to reorder */
+	newsize = FDT_ALIGN(sizeof(struct fdt_header), 8) + mem_rsv_size
+		+ struct_size + fdt_size_dt_strings(fdt);
+
+	if (bufsize < newsize)
+		return -FDT_ERR_NOSPACE;
+
+	/* First attempt to build converted tree at beginning of buffer */
+	tmp = buf;
+	/* But if that overlaps with the old tree... */
+	if (((tmp + newsize) > fdtstart) && (tmp < fdtend)) {
+		/* Try right after the old tree instead */
+		tmp = (char *)(uintptr_t)fdtend;
+		if ((tmp + newsize) > ((char *)buf + bufsize))
+			return -FDT_ERR_NOSPACE;
+	}
+
+	_fdt_packblocks(fdt, tmp, mem_rsv_size, struct_size);
+	memmove(buf, tmp, newsize);
+
+	fdt_set_magic(buf, FDT_MAGIC);
+	fdt_set_totalsize(buf, bufsize);
+	fdt_set_version(buf, 17);
+	fdt_set_last_comp_version(buf, 16);
+	fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt));
+
+	return 0;
+}
+
+int fdt_pack(void *fdt)
+{
+	int mem_rsv_size;
+
+	FDT_RW_CHECK_HEADER(fdt);
+
+	mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
+		* sizeof(struct fdt_reserve_entry);
+	_fdt_packblocks(fdt, fdt, mem_rsv_size, fdt_size_dt_struct(fdt));
+	fdt_set_totalsize(fdt, _fdt_data_size(fdt));
+
+	return 0;
+}
diff --git a/lib/libfdt/fdt_strerror.c b/lib/libfdt/fdt_strerror.c
new file mode 100644
index 00000000..e6c3ceee
--- /dev/null
+++ b/lib/libfdt/fdt_strerror.c
@@ -0,0 +1,96 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+struct fdt_errtabent {
+	const char *str;
+};
+
+#define FDT_ERRTABENT(val) \
+	[(val)] = { .str = #val, }
+
+static struct fdt_errtabent fdt_errtable[] = {
+	FDT_ERRTABENT(FDT_ERR_NOTFOUND),
+	FDT_ERRTABENT(FDT_ERR_EXISTS),
+	FDT_ERRTABENT(FDT_ERR_NOSPACE),
+
+	FDT_ERRTABENT(FDT_ERR_BADOFFSET),
+	FDT_ERRTABENT(FDT_ERR_BADPATH),
+	FDT_ERRTABENT(FDT_ERR_BADSTATE),
+
+	FDT_ERRTABENT(FDT_ERR_TRUNCATED),
+	FDT_ERRTABENT(FDT_ERR_BADMAGIC),
+	FDT_ERRTABENT(FDT_ERR_BADVERSION),
+	FDT_ERRTABENT(FDT_ERR_BADSTRUCTURE),
+	FDT_ERRTABENT(FDT_ERR_BADLAYOUT),
+};
+#define FDT_ERRTABSIZE	(sizeof(fdt_errtable) / sizeof(fdt_errtable[0]))
+
+const char *fdt_strerror(int errval)
+{
+	if (errval > 0)
+		return "<valid offset/length>";
+	else if (errval == 0)
+		return "<no error>";
+	else if (errval > -FDT_ERRTABSIZE) {
+		const char *s = fdt_errtable[-errval].str;
+
+		if (s)
+			return s;
+	}
+
+	return "<unknown error>";
+}
diff --git a/lib/libfdt/fdt_sw.c b/lib/libfdt/fdt_sw.c
new file mode 100644
index 00000000..6a804859
--- /dev/null
+++ b/lib/libfdt/fdt_sw.c
@@ -0,0 +1,288 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+static int _fdt_sw_check_header(void *fdt)
+{
+	if (fdt_magic(fdt) != FDT_SW_MAGIC)
+		return -FDT_ERR_BADMAGIC;
+	/* FIXME: should check more details about the header state */
+	return 0;
+}
+
+#define FDT_SW_CHECK_HEADER(fdt) \
+	{ \
+		int err; \
+		if ((err = _fdt_sw_check_header(fdt)) != 0) \
+			return err; \
+	}
+
+static void *_fdt_grab_space(void *fdt, size_t len)
+{
+	int offset = fdt_size_dt_struct(fdt);
+	int spaceleft;
+
+	spaceleft = fdt_totalsize(fdt) - fdt_off_dt_struct(fdt)
+		- fdt_size_dt_strings(fdt);
+
+	if ((offset + len < offset) || (offset + len > spaceleft))
+		return NULL;
+
+	fdt_set_size_dt_struct(fdt, offset + len);
+	return _fdt_offset_ptr_w(fdt, offset);
+}
+
+int fdt_create(void *buf, int bufsize)
+{
+	void *fdt = buf;
+
+	if (bufsize < sizeof(struct fdt_header))
+		return -FDT_ERR_NOSPACE;
+
+	memset(buf, 0, bufsize);
+
+	fdt_set_magic(fdt, FDT_SW_MAGIC);
+	fdt_set_version(fdt, FDT_LAST_SUPPORTED_VERSION);
+	fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION);
+	fdt_set_totalsize(fdt,  bufsize);
+
+	fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header),
+					      sizeof(struct fdt_reserve_entry)));
+	fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt));
+	fdt_set_off_dt_strings(fdt, bufsize);
+
+	return 0;
+}
+
+int fdt_resize(void *fdt, void *buf, int bufsize)
+{
+	size_t headsize, tailsize;
+	char *oldtail, *newtail;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	headsize = fdt_off_dt_struct(fdt);
+	tailsize = fdt_size_dt_strings(fdt);
+
+	if ((headsize + tailsize) > bufsize)
+		return -FDT_ERR_NOSPACE;
+
+	oldtail = (char *)fdt + fdt_totalsize(fdt) - tailsize;
+	newtail = (char *)buf + bufsize - tailsize;
+
+	/* Two cases to avoid clobbering data if the old and new
+	 * buffers partially overlap */
+	if (buf <= fdt) {
+		memmove(buf, fdt, headsize);
+		memmove(newtail, oldtail, tailsize);
+	} else {
+		memmove(newtail, oldtail, tailsize);
+		memmove(buf, fdt, headsize);
+	}
+
+	fdt_set_off_dt_strings(buf, bufsize);
+	fdt_set_totalsize(buf, bufsize);
+
+	return 0;
+}
+
+int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
+{
+	struct fdt_reserve_entry *re;
+	int offset;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	if (fdt_size_dt_struct(fdt))
+		return -FDT_ERR_BADSTATE;
+
+	offset = fdt_off_dt_struct(fdt);
+	if ((offset + sizeof(*re)) > fdt_totalsize(fdt))
+		return -FDT_ERR_NOSPACE;
+
+	re = (struct fdt_reserve_entry *)((char *)fdt + offset);
+	re->address = cpu_to_fdt64(addr);
+	re->size = cpu_to_fdt64(size);
+
+	fdt_set_off_dt_struct(fdt, offset + sizeof(*re));
+
+	return 0;
+}
+
+int fdt_finish_reservemap(void *fdt)
+{
+	return fdt_add_reservemap_entry(fdt, 0, 0);
+}
+
+int fdt_begin_node(void *fdt, const char *name)
+{
+	struct fdt_node_header *nh;
+	int namelen = strlen(name) + 1;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	nh = _fdt_grab_space(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen));
+	if (! nh)
+		return -FDT_ERR_NOSPACE;
+
+	nh->tag = cpu_to_fdt32(FDT_BEGIN_NODE);
+	memcpy(nh->name, name, namelen);
+	return 0;
+}
+
+int fdt_end_node(void *fdt)
+{
+	fdt32_t *en;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	en = _fdt_grab_space(fdt, FDT_TAGSIZE);
+	if (! en)
+		return -FDT_ERR_NOSPACE;
+
+	*en = cpu_to_fdt32(FDT_END_NODE);
+	return 0;
+}
+
+static int _fdt_find_add_string(void *fdt, const char *s)
+{
+	char *strtab = (char *)fdt + fdt_totalsize(fdt);
+	const char *p;
+	int strtabsize = fdt_size_dt_strings(fdt);
+	int len = strlen(s) + 1;
+	int struct_top, offset;
+
+	p = _fdt_find_string(strtab - strtabsize, strtabsize, s);
+	if (p)
+		return p - strtab;
+
+	/* Add it */
+	offset = -strtabsize - len;
+	struct_top = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
+	if (fdt_totalsize(fdt) + offset < struct_top)
+		return 0; /* no more room :( */
+
+	memcpy(strtab + offset, s, len);
+	fdt_set_size_dt_strings(fdt, strtabsize + len);
+	return offset;
+}
+
+int fdt_property(void *fdt, const char *name, const void *val, int len)
+{
+	struct fdt_property *prop;
+	int nameoff;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	nameoff = _fdt_find_add_string(fdt, name);
+	if (nameoff == 0)
+		return -FDT_ERR_NOSPACE;
+
+	prop = _fdt_grab_space(fdt, sizeof(*prop) + FDT_TAGALIGN(len));
+	if (! prop)
+		return -FDT_ERR_NOSPACE;
+
+	prop->tag = cpu_to_fdt32(FDT_PROP);
+	prop->nameoff = cpu_to_fdt32(nameoff);
+	prop->len = cpu_to_fdt32(len);
+	memcpy(prop->data, val, len);
+	return 0;
+}
+
+int fdt_finish(void *fdt)
+{
+	char *p = (char *)fdt;
+	fdt32_t *end;
+	int oldstroffset, newstroffset;
+	uint32_t tag;
+	int offset, nextoffset;
+
+	FDT_SW_CHECK_HEADER(fdt);
+
+	/* Add terminator */
+	end = _fdt_grab_space(fdt, sizeof(*end));
+	if (! end)
+		return -FDT_ERR_NOSPACE;
+	*end = cpu_to_fdt32(FDT_END);
+
+	/* Relocate the string table */
+	oldstroffset = fdt_totalsize(fdt) - fdt_size_dt_strings(fdt);
+	newstroffset = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
+	memmove(p + newstroffset, p + oldstroffset, fdt_size_dt_strings(fdt));
+	fdt_set_off_dt_strings(fdt, newstroffset);
+
+	/* Walk the structure, correcting string offsets */
+	offset = 0;
+	while ((tag = fdt_next_tag(fdt, offset, &nextoffset)) != FDT_END) {
+		if (tag == FDT_PROP) {
+			struct fdt_property *prop =
+				_fdt_offset_ptr_w(fdt, offset);
+			int nameoff;
+
+			nameoff = fdt32_to_cpu(prop->nameoff);
+			nameoff += fdt_size_dt_strings(fdt);
+			prop->nameoff = cpu_to_fdt32(nameoff);
+		}
+		offset = nextoffset;
+	}
+	if (nextoffset < 0)
+		return nextoffset;
+
+	/* Finally, adjust the header */
+	fdt_set_totalsize(fdt, newstroffset + fdt_size_dt_strings(fdt));
+	fdt_set_magic(fdt, FDT_MAGIC);
+	return 0;
+}
diff --git a/lib/libfdt/fdt_wip.c b/lib/libfdt/fdt_wip.c
new file mode 100644
index 00000000..6aaab399
--- /dev/null
+++ b/lib/libfdt/fdt_wip.c
@@ -0,0 +1,139 @@
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "libfdt_env.h"
+
+#include <fdt.h>
+#include <libfdt.h>
+
+#include "libfdt_internal.h"
+
+int fdt_setprop_inplace_namelen_partial(void *fdt, int nodeoffset,
+					const char *name, int namelen,
+					uint32_t idx, const void *val,
+					int len)
+{
+	void *propval;
+	int proplen;
+
+	propval = fdt_getprop_namelen_w(fdt, nodeoffset, name, namelen,
+					&proplen);
+	if (!propval)
+		return proplen;
+
+	if (proplen < (len + idx))
+		return -FDT_ERR_NOSPACE;
+
+	memcpy((char *)propval + idx, val, len);
+	return 0;
+}
+
+int fdt_setprop_inplace(void *fdt, int nodeoffset, const char *name,
+			const void *val, int len)
+{
+	const void *propval;
+	int proplen;
+
+	propval = fdt_getprop(fdt, nodeoffset, name, &proplen);
+	if (! propval)
+		return proplen;
+
+	if (proplen != len)
+		return -FDT_ERR_NOSPACE;
+
+	return fdt_setprop_inplace_namelen_partial(fdt, nodeoffset, name,
+						   strlen(name), 0,
+						   val, len);
+}
+
+static void _fdt_nop_region(void *start, int len)
+{
+	fdt32_t *p;
+
+	for (p = start; (char *)p < ((char *)start + len); p++)
+		*p = cpu_to_fdt32(FDT_NOP);
+}
+
+int fdt_nop_property(void *fdt, int nodeoffset, const char *name)
+{
+	struct fdt_property *prop;
+	int len;
+
+	prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
+	if (! prop)
+		return len;
+
+	_fdt_nop_region(prop, len + sizeof(*prop));
+
+	return 0;
+}
+
+int _fdt_node_end_offset(void *fdt, int offset)
+{
+	int depth = 0;
+
+	while ((offset >= 0) && (depth >= 0))
+		offset = fdt_next_node(fdt, offset, &depth);
+
+	return offset;
+}
+
+int fdt_nop_node(void *fdt, int nodeoffset)
+{
+	int endoffset;
+
+	endoffset = _fdt_node_end_offset(fdt, nodeoffset);
+	if (endoffset < 0)
+		return endoffset;
+
+	_fdt_nop_region(fdt_offset_ptr_w(fdt, nodeoffset, 0),
+			endoffset - nodeoffset);
+	return 0;
+}
diff --git a/lib/libfdt/libfdt.mk b/lib/libfdt/libfdt.mk
new file mode 100644
index 00000000..d03dde20
--- /dev/null
+++ b/lib/libfdt/libfdt.mk
@@ -0,0 +1,17 @@
+#
+# Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+LIBFDT_SRCS	:=	$(addprefix lib/libfdt/,	\
+			fdt.c				\
+			fdt_addresses.c			\
+			fdt_empty_tree.c		\
+			fdt_ro.c			\
+			fdt_rw.c			\
+			fdt_strerror.c			\
+			fdt_sw.c			\
+			fdt_wip.c)			\
+
+INCLUDES	+=	-Iinclude/lib/libfdt
diff --git a/lib/libfdt/libfdt_internal.h b/lib/libfdt/libfdt_internal.h
new file mode 100644
index 00000000..02cfa6fb
--- /dev/null
+++ b/lib/libfdt/libfdt_internal.h
@@ -0,0 +1,95 @@
+#ifndef _LIBFDT_INTERNAL_H
+#define _LIBFDT_INTERNAL_H
+/*
+ * libfdt - Flat Device Tree manipulation
+ * Copyright (C) 2006 David Gibson, IBM Corporation.
+ *
+ * libfdt is dual licensed: you can use it either under the terms of
+ * the GPL, or the BSD license, at your option.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public
+ *     License along with this library; if not, write to the Free
+ *     Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+ *     MA 02110-1301 USA
+ *
+ * Alternatively,
+ *
+ *  b) Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *     1. Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *     2. Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <fdt.h>
+
+#define FDT_ALIGN(x, a)		(((x) + (a) - 1) & ~((a) - 1))
+#define FDT_TAGALIGN(x)		(FDT_ALIGN((x), FDT_TAGSIZE))
+
+#define FDT_CHECK_HEADER(fdt) \
+	{ \
+		int __err; \
+		if ((__err = fdt_check_header(fdt)) != 0) \
+			return __err; \
+	}
+
+int _fdt_check_node_offset(const void *fdt, int offset);
+int _fdt_check_prop_offset(const void *fdt, int offset);
+const char *_fdt_find_string(const char *strtab, int tabsize, const char *s);
+int _fdt_node_end_offset(void *fdt, int nodeoffset);
+
+static inline const void *_fdt_offset_ptr(const void *fdt, int offset)
+{
+	return (const char *)fdt + fdt_off_dt_struct(fdt) + offset;
+}
+
+static inline void *_fdt_offset_ptr_w(void *fdt, int offset)
+{
+	return (void *)(uintptr_t)_fdt_offset_ptr(fdt, offset);
+}
+
+static inline const struct fdt_reserve_entry *_fdt_mem_rsv(const void *fdt, int n)
+{
+	const struct fdt_reserve_entry *rsv_table =
+		(const struct fdt_reserve_entry *)
+		((const char *)fdt + fdt_off_mem_rsvmap(fdt));
+
+	return rsv_table + n;
+}
+static inline struct fdt_reserve_entry *_fdt_mem_rsv_w(void *fdt, int n)
+{
+	return (void *)(uintptr_t)_fdt_mem_rsv(fdt, n);
+}
+
+#define FDT_SW_MAGIC		(~FDT_MAGIC)
+
+#endif /* _LIBFDT_INTERNAL_H */
diff --git a/lib/locks/bakery/bakery_lock_coherent.c b/lib/locks/bakery/bakery_lock_coherent.c
index 5d538ce2..a857e035 100644
--- a/lib/locks/bakery/bakery_lock_coherent.c
+++ b/lib/locks/bakery/bakery_lock_coherent.c
@@ -1,31 +1,7 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch_helpers.h>
@@ -63,27 +39,15 @@
 	assert(entry < BAKERY_LOCK_MAX_CPUS);		\
 } while (0)
 
-/* Convert a ticket to priority */
-#define PRIORITY(t, pos)	(((t) << 8) | (pos))
-
-
-/* Initialize Bakery Lock to reset ownership and all ticket values */
-void bakery_lock_init(bakery_lock_t *bakery)
-{
-	assert(bakery);
-
-	/* All ticket values need to be 0 */
-	memset(bakery, 0, sizeof(*bakery));
-	bakery->owner = NO_OWNER;
-}
-
-
 /* Obtain a ticket for a given CPU */
 static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
 {
 	unsigned int my_ticket, their_ticket;
 	unsigned int they;
 
+	/* Prevent recursive acquisition */
+	assert(!bakery_ticket_number(bakery->lock_data[me]));
+
 	/*
 	 * Flag that we're busy getting our ticket. All CPUs are iterated in the
 	 * order of their ordinal position to decide the maximum ticket value
@@ -95,9 +59,9 @@ static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
 	 * value, not the ticket value alone.
 	 */
 	my_ticket = 0;
-	bakery->entering[me] = 1;
+	bakery->lock_data[me] = make_bakery_data(CHOOSING_TICKET, my_ticket);
 	for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
-		their_ticket = bakery->number[they];
+		their_ticket = bakery_ticket_number(bakery->lock_data[they]);
 		if (their_ticket > my_ticket)
 			my_ticket = their_ticket;
 	}
@@ -107,8 +71,7 @@ static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
 	 * finish calculating our ticket value that we're done
 	 */
 	++my_ticket;
-	bakery->number[me] = my_ticket;
-	bakery->entering[me] = 0;
+	bakery->lock_data[me] = make_bakery_data(CHOSEN_TICKET, my_ticket);
 
 	return my_ticket;
 }
@@ -129,14 +92,12 @@ void bakery_lock_get(bakery_lock_t *bakery)
 {
 	unsigned int they, me;
 	unsigned int my_ticket, my_prio, their_ticket;
+	unsigned int their_bakery_data;
 
-	me = platform_get_core_pos(read_mpidr_el1());
+	me = plat_my_core_pos();
 
 	assert_bakery_entry_valid(me, bakery);
 
-	/* Prevent recursive acquisition */
-	assert(bakery->owner != me);
-
 	/* Get a ticket */
 	my_ticket = bakery_get_ticket(bakery, me);
 
@@ -150,14 +111,15 @@ void bakery_lock_get(bakery_lock_t *bakery)
 			continue;
 
 		/* Wait for the contender to get their ticket */
-		while (bakery->entering[they])
-			;
+		do {
+			their_bakery_data = bakery->lock_data[they];
+		} while (bakery_is_choosing(their_bakery_data));
 
 		/*
 		 * If the other party is a contender, they'll have non-zero
 		 * (valid) ticket value. If they do, compare priorities
 		 */
-		their_ticket = bakery->number[they];
+		their_ticket = bakery_ticket_number(their_bakery_data);
 		if (their_ticket && (PRIORITY(their_ticket, they) < my_prio)) {
 			/*
 			 * They have higher priority (lower value). Wait for
@@ -167,29 +129,27 @@ void bakery_lock_get(bakery_lock_t *bakery)
 			 */
 			do {
 				wfe();
-			} while (their_ticket == bakery->number[they]);
+			} while (their_ticket ==
+				bakery_ticket_number(bakery->lock_data[they]));
 		}
 	}
-
 	/* Lock acquired */
-	bakery->owner = me;
 }
 
 
 /* Release the lock and signal contenders */
 void bakery_lock_release(bakery_lock_t *bakery)
 {
-	unsigned int me = platform_get_core_pos(read_mpidr_el1());
+	unsigned int me = plat_my_core_pos();
 
 	assert_bakery_entry_valid(me, bakery);
-	assert(bakery->owner == me);
+	assert(bakery_ticket_number(bakery->lock_data[me]));
 
 	/*
-	 * Release lock by resetting ownership and ticket. Then signal other
+	 * Release lock by resetting ticket. Then signal other
 	 * waiting contenders
 	 */
-	bakery->owner = NO_OWNER;
-	bakery->number[me] = 0;
+	bakery->lock_data[me] = 0;
 	dsb();
 	sev();
 }
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
index a325fd4f..8f59215e 100644
--- a/lib/locks/bakery/bakery_lock_normal.c
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -1,31 +1,7 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch_helpers.h>
@@ -56,35 +32,57 @@
  * accesses regardless of status of address translation.
  */
 
-/* Convert a ticket to priority */
-#define PRIORITY(t, pos)	(((t) << 8) | (pos))
-
-#define CHOOSING_TICKET		0x1
-#define CHOOSING_DONE		0x0
-
-#define bakery_is_choosing(info)	(info & 0x1)
-#define bakery_ticket_number(info)	((info >> 1) & 0x7FFF)
-#define make_bakery_data(choosing, number) \
-		(((choosing & 0x1) | (number << 1)) & 0xFFFF)
-
-/* This macro assumes that the bakery_info array is located at the offset specified */
-#define get_my_bakery_info(offset, id)		\
-	(((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
+#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE
+/*
+ * Verify that the platform defined value for the per-cpu space for bakery locks is
+ * a multiple of the cache line size, to prevent multiple CPUs writing to the same
+ * bakery lock cache line
+ *
+ * Using this value, if provided, rather than the linker generated value results in
+ * more efficient code
+ */
+CASSERT((PLAT_PERCPU_BAKERY_LOCK_SIZE & (CACHE_WRITEBACK_GRANULE - 1)) == 0, \
+	PLAT_PERCPU_BAKERY_LOCK_SIZE_not_cacheline_multiple);
+#define PERCPU_BAKERY_LOCK_SIZE (PLAT_PERCPU_BAKERY_LOCK_SIZE)
+#else
+/*
+ * Use the linker defined symbol which has evaluated the size reqiurement.
+ * This is not as efficient as using a platform defined constant
+ */
+extern void *__PERCPU_BAKERY_LOCK_SIZE__;
+#define PERCPU_BAKERY_LOCK_SIZE ((uintptr_t)&__PERCPU_BAKERY_LOCK_SIZE__)
+#endif
 
-#define get_bakery_info_by_index(offset, id, ix)	\
-	(((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
+#define get_bakery_info(cpu_ix, lock)	\
+	(bakery_info_t *)((uintptr_t)lock + cpu_ix * PERCPU_BAKERY_LOCK_SIZE)
 
 #define write_cache_op(addr, cached)	\
 				do {	\
-					(cached ? dccvac((uint64_t)addr) :\
-						dcivac((uint64_t)addr));\
+					(cached ? dccvac((uintptr_t)addr) :\
+						dcivac((uintptr_t)addr));\
 						dsbish();\
 				} while (0)
 
 #define read_cache_op(addr, cached)	if (cached) \
-					    dccivac((uint64_t)addr)
+					    dccivac((uintptr_t)addr)
 
-static unsigned int bakery_get_ticket(int id, unsigned int offset,
+/* Helper function to check if the lock is acquired */
+static inline int is_lock_acquired(const bakery_info_t *my_bakery_info,
+							int is_cached)
+{
+	/*
+	 * Even though lock data is updated only by the owning cpu and
+	 * appropriate cache maintenance operations are performed,
+	 * if the previous update was done when the cpu was not participating
+	 * in coherency, then there is a chance that cache maintenance
+	 * operations were not propagated to all the caches in the system.
+	 * Hence do a `read_cache_op()` prior to read.
+	 */
+	read_cache_op(my_bakery_info, is_cached);
+	return !!(bakery_ticket_number(my_bakery_info->lock_data));
+}
+
+static unsigned int bakery_get_ticket(bakery_lock_t *lock,
 						unsigned int me, int is_cached)
 {
 	unsigned int my_ticket, their_ticket;
@@ -95,9 +93,12 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset,
 	 * Obtain a reference to the bakery information for this cpu and ensure
 	 * it is not NULL.
 	 */
-	my_bakery_info = get_my_bakery_info(offset, id);
+	my_bakery_info = get_bakery_info(me, lock);
 	assert(my_bakery_info);
 
+	/* Prevent recursive acquisition.*/
+	assert(!is_lock_acquired(my_bakery_info, is_cached));
+
 	/*
 	 * Tell other contenders that we are through the bakery doorway i.e.
 	 * going to allocate a ticket for this cpu.
@@ -119,7 +120,7 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset,
 		 * Get a reference to the other contender's bakery info and
 		 * ensure that a stale copy is not read.
 		 */
-		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		their_bakery_info = get_bakery_info(they, lock);
 		assert(their_bakery_info);
 
 		read_cache_op(their_bakery_info, is_cached);
@@ -138,26 +139,29 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset,
 	 * finish calculating our ticket value that we're done
 	 */
 	++my_ticket;
-	my_bakery_info->lock_data = make_bakery_data(CHOOSING_DONE, my_ticket);
+	my_bakery_info->lock_data = make_bakery_data(CHOSEN_TICKET, my_ticket);
 
 	write_cache_op(my_bakery_info, is_cached);
 
 	return my_ticket;
 }
 
-void bakery_lock_get(unsigned int id, unsigned int offset)
+void bakery_lock_get(bakery_lock_t *lock)
 {
 	unsigned int they, me, is_cached;
 	unsigned int my_ticket, my_prio, their_ticket;
 	bakery_info_t *their_bakery_info;
-	uint16_t their_bakery_data;
-
-	me = platform_get_core_pos(read_mpidr_el1());
+	unsigned int their_bakery_data;
 
+	me = plat_my_core_pos();
+#ifdef AARCH32
+	is_cached = read_sctlr() & SCTLR_C_BIT;
+#else
 	is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+#endif
 
 	/* Get a ticket */
-	my_ticket = bakery_get_ticket(id, offset, me, is_cached);
+	my_ticket = bakery_get_ticket(lock, me, is_cached);
 
 	/*
 	 * Now that we got our ticket, compute our priority value, then compare
@@ -172,17 +176,14 @@ void bakery_lock_get(unsigned int id, unsigned int offset)
 		 * Get a reference to the other contender's bakery info and
 		 * ensure that a stale copy is not read.
 		 */
-		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		their_bakery_info = get_bakery_info(they, lock);
 		assert(their_bakery_info);
-		read_cache_op(their_bakery_info, is_cached);
-
-		their_bakery_data = their_bakery_info->lock_data;
 
 		/* Wait for the contender to get their ticket */
-		while (bakery_is_choosing(their_bakery_data)) {
+		do {
 			read_cache_op(their_bakery_info, is_cached);
 			their_bakery_data = their_bakery_info->lock_data;
-		}
+		} while (bakery_is_choosing(their_bakery_data));
 
 		/*
 		 * If the other party is a contender, they'll have non-zero
@@ -203,14 +204,22 @@ void bakery_lock_get(unsigned int id, unsigned int offset)
 				== bakery_ticket_number(their_bakery_info->lock_data));
 		}
 	}
+	/* Lock acquired */
 }
 
-void bakery_lock_release(unsigned int id, unsigned int offset)
+void bakery_lock_release(bakery_lock_t *lock)
 {
 	bakery_info_t *my_bakery_info;
+#ifdef AARCH32
+	unsigned int is_cached = read_sctlr() & SCTLR_C_BIT;
+#else
 	unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+#endif
+
+	my_bakery_info = get_bakery_info(plat_my_core_pos(), lock);
+
+	assert(is_lock_acquired(my_bakery_info, is_cached));
 
-	my_bakery_info = get_my_bakery_info(offset, id);
 	my_bakery_info->lock_data = 0;
 	write_cache_op(my_bakery_info, is_cached);
 	sev();
diff --git a/lib/locks/exclusive/aarch32/spinlock.S b/lib/locks/exclusive/aarch32/spinlock.S
new file mode 100644
index 00000000..bc77bc9c
--- /dev/null
+++ b/lib/locks/exclusive/aarch32/spinlock.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+
+	.globl	spin_lock
+	.globl	spin_unlock
+
+
+func spin_lock
+	mov	r2, #1
+1:
+	ldrex	r1, [r0]
+	cmp	r1, #0
+	wfene
+	strexeq	r1, r2, [r0]
+	cmpeq	r1, #0
+	bne	1b
+	dmb
+	bx	lr
+endfunc spin_lock
+
+
+func spin_unlock
+	mov	r1, #0
+	stl	r1, [r0]
+	bx	lr
+endfunc spin_unlock
diff --git a/lib/locks/exclusive/aarch64/spinlock.S b/lib/locks/exclusive/aarch64/spinlock.S
new file mode 100644
index 00000000..e2f9eaa4
--- /dev/null
+++ b/lib/locks/exclusive/aarch64/spinlock.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+
+	.globl	spin_lock
+	.globl	spin_unlock
+
+#if ARM_ARCH_AT_LEAST(8, 1)
+
+/*
+ * When compiled for ARMv8.1 or later, choose spin locks based on Compare and
+ * Swap instruction.
+ */
+# define USE_CAS	1
+
+/*
+ * Lock contenders using CAS, upon failing to acquire the lock, wait with the
+ * monitor in open state. Therefore, a normal store upon unlocking won't
+ * generate an SEV. Use explicit SEV instruction with CAS unlock.
+ */
+# define COND_SEV()	sev
+
+#else
+
+# define USE_CAS	0
+
+/*
+ * Lock contenders using exclusive pairs, upon failing to acquire the lock, wait
+ * with the monitor in exclusive state. A normal store upon unlocking will
+ * implicitly generate an envent; so, no explicit SEV with unlock is required.
+ */
+# define COND_SEV()
+
+#endif
+
+#if USE_CAS
+
+	.arch	armv8.1-a
+
+/*
+ * Acquire lock using Compare and Swap instruction.
+ *
+ * Compare for 0 with acquire semantics, and swap 1. Wait until CAS returns
+ * 0.
+ *
+ * void spin_lock(spinlock_t *lock);
+ */
+func spin_lock
+	mov	w2, #1
+	sevl
+1:
+	wfe
+	mov	w1, wzr
+	casa	w1, w2, [x0]
+	cbnz	w1, 1b
+	ret
+endfunc spin_lock
+
+	.arch	armv8-a
+
+#else /* !USE_CAS */
+
+/*
+ * Acquire lock using load-/store-exclusive instruction pair.
+ *
+ * void spin_lock(spinlock_t *lock);
+ */
+func spin_lock
+	mov	w2, #1
+	sevl
+l1:	wfe
+l2:	ldaxr	w1, [x0]
+	cbnz	w1, l1
+	stxr	w1, w2, [x0]
+	cbnz	w1, l2
+	ret
+endfunc spin_lock
+
+#endif /* USE_CAS */
+
+/*
+ * Release lock previously acquired by spin_lock.
+ *
+ * Unconditionally write 0, and conditionally generate an event.
+ *
+ * void spin_unlock(spinlock_t *lock);
+ */
+func spin_unlock
+	stlr	wzr, [x0]
+	COND_SEV()
+	ret
+endfunc spin_unlock
diff --git a/lib/locks/exclusive/spinlock.S b/lib/locks/exclusive/spinlock.S
index 5eae2b08..2141f988 100644
--- a/lib/locks/exclusive/spinlock.S
+++ b/lib/locks/exclusive/spinlock.S
@@ -1,50 +1,9 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
-#include <asm_macros.S>
-
-	.globl	spin_lock
-	.globl	spin_unlock
-
-
-func spin_lock
-	mov	w2, #1
-	sevl
-l1:	wfe
-l2:	ldaxr	w1, [x0]
-	cbnz	w1, l1
-	stxr	w1, w2, [x0]
-	cbnz	w1, l2
-	ret
-
-
-func spin_unlock
-	stlr	wzr, [x0]
-	ret
+#if !ERROR_DEPRECATED
+#include "./aarch64/spinlock.S"
+#endif
diff --git a/lib/optee/optee_utils.c b/lib/optee/optee_utils.c
new file mode 100644
index 00000000..deb948c2
--- /dev/null
+++ b/lib/optee/optee_utils.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <debug.h>
+#include <desc_image_load.h>
+#include <errno.h>
+#include <optee_utils.h>
+
+/*
+ * load_addr_hi and load_addr_lo: image load address.
+ * image_id: 0 - pager, 1 - paged
+ * size: image size in bytes.
+ */
+typedef struct optee_image {
+	uint32_t load_addr_hi;
+	uint32_t load_addr_lo;
+	uint32_t image_id;
+	uint32_t size;
+} optee_image_t;
+
+#define OPTEE_PAGER_IMAGE_ID		0
+#define OPTEE_PAGED_IMAGE_ID		1
+#define OPTEE_MAX_IMAGE_NUM		2
+
+#define TEE_MAGIC_NUM_OPTEE		0x4554504f
+/*
+ * magic: header magic number.
+ * version: OPTEE header version:
+ * 	1 - not supported
+ * 	2 - supported
+ * arch: OPTEE os architecture type: 0 - AARCH32, 1 - AARCH64.
+ * flags: unused currently.
+ * nb_images: number of images.
+ */
+typedef struct optee_header {
+	uint32_t magic;
+	uint8_t version;
+	uint8_t arch;
+	uint16_t flags;
+	uint32_t nb_images;
+	optee_image_t optee_image[];
+} optee_header_t;
+
+/*******************************************************************************
+ * Check if it is a valid tee header
+ * Return 1 if valid
+ * Return 0 if invalid
+ ******************************************************************************/
+static inline int tee_validate_header(optee_header_t *optee_header)
+{
+	if ((optee_header->magic == TEE_MAGIC_NUM_OPTEE) &&
+		(optee_header->version == 2) &&
+		(optee_header->nb_images <= OPTEE_MAX_IMAGE_NUM)) {
+		return 1;
+	}
+
+	WARN("Not a known TEE, use default loading options.\n");
+	return 0;
+}
+
+/*******************************************************************************
+ * Parse the OPTEE image
+ * Return 0 on success or a negative error code otherwise.
+ ******************************************************************************/
+static int parse_optee_image(image_info_t *image_info,
+		optee_image_t *optee_image)
+{
+	uintptr_t init_load_addr, free_end, requested_end;
+	size_t init_size;
+
+	init_load_addr = ((uint64_t)optee_image->load_addr_hi << 32) |
+					optee_image->load_addr_lo;
+	init_size = optee_image->size;
+
+	/*
+	 * -1 indicates loader decided address; take our pre-mapped area
+	 * for current image since arm-tf could not allocate memory dynamically
+	 */
+	if (init_load_addr == -1)
+		init_load_addr = image_info->image_base;
+
+	/* Check that the default end address doesn't overflow */
+	if (check_uptr_overflow(image_info->image_base,
+				image_info->image_max_size - 1))
+		return -1;
+	free_end = image_info->image_base + (image_info->image_max_size - 1);
+
+	/* Check that the image end address doesn't overflow */
+	if (check_uptr_overflow(init_load_addr, init_size - 1))
+		return -1;
+	requested_end = init_load_addr + (init_size - 1);
+	/*
+	 * Check that the requested RAM location is within reserved
+	 * space for OPTEE.
+	 */
+	if (!((init_load_addr >= image_info->image_base) &&
+			(requested_end <= free_end))) {
+		WARN("The load address in optee header %p - %p is not in reserved area: %p - %p.\n",
+				(void *)init_load_addr,
+				(void *)(init_load_addr + init_size),
+				(void *)image_info->image_base,
+				(void *)(image_info->image_base +
+					image_info->image_max_size));
+		return -1;
+	}
+
+	/*
+	 * Remove the skip attr from image_info, the image will be loaded.
+	 * The default attr in image_info is "IMAGE_ATTRIB_SKIP_LOADING", which
+	 * mean the image will not be loaded. Here, we parse the header image to
+	 * know that the extra image need to be loaded, so remove the skip attr.
+	 */
+	image_info->h.attr &= ~IMAGE_ATTRIB_SKIP_LOADING;
+
+	/* Update image base and size of image_info */
+	image_info->image_base = init_load_addr;
+	image_info->image_size = init_size;
+
+	return 0;
+}
+
+/*******************************************************************************
+ * Parse the OPTEE header
+ * Return 0 on success or a negative error code otherwise.
+ ******************************************************************************/
+int parse_optee_header(entry_point_info_t *header_ep,
+		image_info_t *pager_image_info,
+		image_info_t *paged_image_info)
+
+{
+	optee_header_t *optee_header;
+	int num, ret;
+
+	assert(header_ep);
+	optee_header = (optee_header_t *)header_ep->pc;
+	assert(optee_header);
+
+	/*
+	 * OPTEE image has 3 types:
+	 *
+	 * 1. Plain OPTEE bin without header.
+	 *	Original bin without header, return directly,
+	 *	BL32_EXTRA1_IMAGE_ID and BL32_EXTRA2_IMAGE_ID will be skipped.
+	 *
+	 * 2. OPTEE bin with header bin, but no paging.
+	 *	Header available and nb_images = 1, remove skip attr for
+	 *	BL32_EXTRA1_IMAGE_ID. BL32_EXTRA1_IMAGE_ID will be loaded,
+	 *	and BL32_EXTRA2_IMAGE_ID be skipped.
+	 *
+	 * 3. OPTEE image with paging support.
+	 *	Header available and nb_images = 2, there are 3 bins: header,
+	 *	pager and pageable. Remove skip attr for BL32_EXTRA1_IMAGE_ID
+	 *	and BL32_EXTRA2_IMAGE_ID to load pager and paged bin.
+	 */
+	if (!tee_validate_header(optee_header)) {
+		INFO("Invalid OPTEE header, legacy mode.\n");
+		/* Set legacy OPTEE runtime arch - aarch64 */
+		header_ep->args.arg0 = MODE_RW_64;
+		return 0;
+	}
+
+	/* Print the OPTEE header information */
+	INFO("OPTEE ep=0x%x\n", (unsigned int)header_ep->pc);
+	INFO("OPTEE header info:\n");
+	INFO("      magic=0x%x\n", optee_header->magic);
+	INFO("      version=0x%x\n", optee_header->version);
+	INFO("      arch=0x%x\n", optee_header->arch);
+	INFO("      flags=0x%x\n", optee_header->flags);
+	INFO("      nb_images=0x%x\n", optee_header->nb_images);
+
+	/* Parse OPTEE image */
+	for (num = 0; num < optee_header->nb_images; num++) {
+		if (optee_header->optee_image[num].image_id ==
+				OPTEE_PAGER_IMAGE_ID) {
+			ret = parse_optee_image(pager_image_info,
+				&optee_header->optee_image[num]);
+		} else if (optee_header->optee_image[num].image_id ==
+				OPTEE_PAGED_IMAGE_ID) {
+			ret = parse_optee_image(paged_image_info,
+				&optee_header->optee_image[num]);
+		} else {
+			ERROR("Parse optee image failed.\n");
+			return -1;
+		}
+
+		if (ret != 0)
+			return -1;
+	}
+
+	/*
+	 * Update "pc" value which should comes from pager image. After the
+	 * header image is parsed, it will be unuseful, and the actual
+	 * execution image after BL31 is pager image.
+	 */
+	header_ep->pc =	pager_image_info->image_base;
+
+	/*
+	 * The paged load address and size are populated in
+	 * header image arguments so that can be read by the
+	 * BL32 SPD.
+	 */
+	header_ep->args.arg1 = paged_image_info->image_base;
+	header_ep->args.arg2 = paged_image_info->image_size;
+
+	/* Set OPTEE runtime arch - aarch32/aarch64 */
+	if (optee_header->arch == 0)
+		header_ep->args.arg0 = MODE_RW_32;
+	else
+		header_ep->args.arg0 = MODE_RW_64;
+
+	return 0;
+}
diff --git a/lib/pmf/pmf_main.c b/lib/pmf/pmf_main.c
new file mode 100644
index 00000000..2cf260ec
--- /dev/null
+++ b/lib/pmf/pmf_main.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <debug.h>
+#include <errno.h>
+#include <platform.h>
+#include <pmf.h>
+#include <string.h>
+
+/*******************************************************************************
+ * The 'pmf_svc_descs' array holds the PMF service descriptors exported by
+ * services by placing them in the 'pmf_svc_descs' linker section.
+ * The 'pmf_svc_descs_indices' array holds the index of a descriptor in the
+ * 'pmf_svc_descs' array. The TIF[15:10] bits in the time-stamp id are used
+ * to get an index into the 'pmf_svc_descs_indices' array. This gives the
+ * index of the descriptor in the 'pmf_svc_descs' array  which contains the
+ * service function pointers.
+ ******************************************************************************/
+extern uintptr_t __PMF_SVC_DESCS_START__;
+extern uintptr_t __PMF_SVC_DESCS_END__;
+#define PMF_SVC_DESCS_START		((uintptr_t)(&__PMF_SVC_DESCS_START__))
+#define PMF_SVC_DESCS_END		((uintptr_t)(&__PMF_SVC_DESCS_END__))
+extern void *__PERCPU_TIMESTAMP_SIZE__;
+#define PMF_PERCPU_TIMESTAMP_SIZE	((uintptr_t)&__PERCPU_TIMESTAMP_SIZE__)
+extern uintptr_t __PMF_TIMESTAMP_START__;
+#define PMF_TIMESTAMP_ARRAY_START	((uintptr_t)&__PMF_TIMESTAMP_START__)
+extern uintptr_t __PMF_TIMESTAMP_END__;
+#define PMF_TIMESTAMP_ARRAY_END		((uintptr_t)&__PMF_TIMESTAMP_END__)
+
+#define PMF_SVC_DESCS_MAX		10
+
+/*
+ * This is used to traverse through registered PMF services.
+ */
+static pmf_svc_desc_t *pmf_svc_descs;
+
+/*
+ * This array is used to store registered PMF services in sorted order.
+ */
+static int pmf_svc_descs_indices[PMF_SVC_DESCS_MAX];
+
+/*
+ * This is used to track total number of successfully registered PMF services.
+ */
+static int pmf_num_services;
+
+/*
+ * This is the main PMF function that initialize registered
+ * PMF services and also sort them in ascending order.
+ */
+int pmf_setup(void)
+{
+	int rc, ii, jj = 0;
+	int pmf_svc_descs_num, temp_val;
+
+	/* If no PMF services are registered then simply bail out */
+	pmf_svc_descs_num = (PMF_SVC_DESCS_END - PMF_SVC_DESCS_START)/
+				 sizeof(pmf_svc_desc_t);
+	if (pmf_svc_descs_num == 0)
+		return 0;
+
+	assert(pmf_svc_descs_num < PMF_SVC_DESCS_MAX);
+
+	pmf_svc_descs = (pmf_svc_desc_t *) PMF_SVC_DESCS_START;
+	for (ii = 0; ii < pmf_svc_descs_num; ii++) {
+
+		assert(pmf_svc_descs[ii].get_ts);
+
+		/*
+		 * Call the initialization routine for this
+		 * PMF service, if it is defined.
+		 */
+		if (pmf_svc_descs[ii].init) {
+			rc = pmf_svc_descs[ii].init();
+			if (rc) {
+				WARN("Could not initialize PMF"
+					"service %s - skipping \n",
+					pmf_svc_descs[ii].name);
+				continue;
+			}
+		}
+
+		/* Update the pmf_svc_descs_indices array */
+		pmf_svc_descs_indices[jj++] = ii;
+	}
+
+	pmf_num_services = jj;
+
+	/*
+	 * Sort the successfully registered PMF services
+	 * according to service ID
+	 */
+	for (ii = 1; ii < pmf_num_services; ii++) {
+		for (jj = 0; jj < (pmf_num_services - ii); jj++) {
+			if ((pmf_svc_descs[jj].svc_config & PMF_SVC_ID_MASK) >
+				(pmf_svc_descs[jj + 1].svc_config &
+						PMF_SVC_ID_MASK)) {
+				temp_val = pmf_svc_descs_indices[jj];
+				pmf_svc_descs_indices[jj] =
+						pmf_svc_descs_indices[jj+1];
+				pmf_svc_descs_indices[jj+1] = temp_val;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This function implements binary search to find registered
+ * PMF service based on Service ID provided in `tid` argument.
+ */
+static pmf_svc_desc_t *get_service(unsigned int tid)
+{
+	int low = 0;
+	int mid;
+	int high = pmf_num_services;
+	unsigned int svc_id = tid & PMF_SVC_ID_MASK;
+	int index;
+	unsigned int desc_svc_id;
+
+	if (pmf_num_services == 0)
+		return NULL;
+
+	assert(pmf_svc_descs);
+
+	do {
+		mid = (low + high) / 2;
+		index = pmf_svc_descs_indices[mid];
+
+		desc_svc_id = pmf_svc_descs[index].svc_config & PMF_SVC_ID_MASK;
+		if (svc_id < desc_svc_id)
+			high = mid - 1;
+		if (svc_id > desc_svc_id)
+			low = mid + 1;
+	} while ((svc_id != desc_svc_id) && (low <= high));
+
+	/*
+	 * Make sure the Service found supports the tid range.
+	 */
+	if ((svc_id == desc_svc_id) && ((tid & PMF_TID_MASK) <
+		(pmf_svc_descs[index].svc_config & PMF_TID_MASK)))
+		return (pmf_svc_desc_t *)&pmf_svc_descs[index];
+
+	return NULL;
+}
+
+/*
+ * This function gets the time-stamp value for the PMF services
+ * registered for SMC interface based on `tid` and `mpidr`.
+ */
+int pmf_get_timestamp_smc(unsigned int tid,
+		u_register_t mpidr,
+		unsigned int flags,
+		unsigned long long *ts_value)
+{
+	pmf_svc_desc_t *svc_desc;
+	assert(ts_value);
+
+	/* Search for registered service. */
+	svc_desc = get_service(tid);
+
+	if ((svc_desc == NULL) || (plat_core_pos_by_mpidr(mpidr) < 0)) {
+		*ts_value = 0;
+		return -EINVAL;
+	} else {
+		/* Call the service time-stamp handler. */
+		*ts_value = svc_desc->get_ts(tid, mpidr, flags);
+		return 0;
+	}
+}
+
+/*
+ * This function can be used to dump `ts` value for given `tid`.
+ * Assumption is that the console is already initialized.
+ */
+void __pmf_dump_timestamp(unsigned int tid, unsigned long long ts)
+{
+	tf_printf("PMF:cpu %u	tid %u	ts %llu\n",
+		plat_my_core_pos(), tid, ts);
+}
+
+/*
+ * This function calculate the address identified by
+ * `base_addr`, `tid` and `cpuid`.
+ */
+static inline uintptr_t calc_ts_addr(uintptr_t base_addr,
+		unsigned int tid,
+		unsigned int cpuid)
+{
+	assert(cpuid < PLATFORM_CORE_COUNT);
+	assert(base_addr >= PMF_TIMESTAMP_ARRAY_START);
+	assert(base_addr < ((PMF_TIMESTAMP_ARRAY_START +
+		PMF_PERCPU_TIMESTAMP_SIZE) - ((tid & PMF_TID_MASK) *
+		sizeof(unsigned long long))));
+
+	base_addr += ((cpuid * PMF_PERCPU_TIMESTAMP_SIZE) +
+		((tid & PMF_TID_MASK) * sizeof(unsigned long long)));
+
+	return base_addr;
+}
+
+/*
+ * This function stores the `ts` value to the storage identified by
+ * `base_addr`, `tid` and current cpu id.
+ * Note: The timestamp addresses are cache line aligned per cpu
+ * and only the owning CPU would ever write into it.
+ */
+void __pmf_store_timestamp(uintptr_t base_addr,
+			unsigned int tid,
+			unsigned long long ts)
+{
+	unsigned long long *ts_addr = (unsigned long long *)calc_ts_addr(base_addr,
+				 tid, plat_my_core_pos());
+	*ts_addr = ts;
+}
+
+/*
+ * This is the cached version of `pmf_store_my_timestamp`
+ * Note: The timestamp addresses are cache line aligned per cpu
+ * and only the owning CPU would ever write into it.
+ */
+void __pmf_store_timestamp_with_cache_maint(uintptr_t base_addr,
+			unsigned int tid,
+			unsigned long long ts)
+{
+	unsigned long long *ts_addr = (unsigned long long *)calc_ts_addr(base_addr,
+				 tid, plat_my_core_pos());
+	*ts_addr = ts;
+	flush_dcache_range((uintptr_t)ts_addr, sizeof(unsigned long long));
+}
+
+/*
+ * This function retrieves the `ts` value from the storage identified by
+ * `base_addr`, `tid` and `cpuid`.
+ * Note: The timestamp addresses are cache line aligned per cpu.
+ */
+unsigned long long __pmf_get_timestamp(uintptr_t base_addr,
+			unsigned int tid,
+			unsigned int cpuid,
+			unsigned int flags)
+{
+	assert(cpuid < PLATFORM_CORE_COUNT);
+	unsigned long long *ts_addr = (unsigned long long *)calc_ts_addr(base_addr,
+				tid, cpuid);
+
+	if (flags & PMF_CACHE_MAINT)
+		inv_dcache_range((uintptr_t)ts_addr, sizeof(unsigned long long));
+
+	return *ts_addr;
+}
diff --git a/lib/pmf/pmf_smc.c b/lib/pmf/pmf_smc.c
new file mode 100644
index 00000000..248c1fac
--- /dev/null
+++ b/lib/pmf/pmf_smc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <assert.h>
+#include <debug.h>
+#include <platform.h>
+#include <pmf.h>
+#include <smcc_helpers.h>
+
+/*
+ * This function is responsible for handling all PMF SMC calls.
+ */
+uintptr_t pmf_smc_handler(unsigned int smc_fid,
+			u_register_t x1,
+			u_register_t x2,
+			u_register_t x3,
+			u_register_t x4,
+			void *cookie,
+			void *handle,
+			u_register_t flags)
+{
+	int rc;
+	unsigned long long ts_value;
+
+	if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) {
+
+		x1 = (uint32_t)x1;
+		x2 = (uint32_t)x2;
+		x3 = (uint32_t)x3;
+
+		switch (smc_fid) {
+		case PMF_SMC_GET_TIMESTAMP_32:
+			/*
+			 * Return error code and the captured
+			 * time-stamp to the caller.
+			 * x0 --> error code.
+			 * x1 - x2 --> time-stamp value.
+			 */
+			rc = pmf_get_timestamp_smc(x1, x2, x3, &ts_value);
+			SMC_RET3(handle, rc, (uint32_t)ts_value,
+					(uint32_t)(ts_value >> 32));
+
+		default:
+			break;
+		}
+	} else {
+		switch (smc_fid) {
+		case PMF_SMC_GET_TIMESTAMP_64:
+			/*
+			 * Return error code and the captured
+			 * time-stamp to the caller.
+			 * x0 --> error code.
+			 * x1 --> time-stamp value.
+			 */
+			rc = pmf_get_timestamp_smc(x1, x2, x3, &ts_value);
+			SMC_RET2(handle, rc, ts_value);
+
+		default:
+			break;
+		}
+	}
+
+	WARN("Unimplemented PMF Call: 0x%x \n", smc_fid);
+	SMC_RET1(handle, SMC_UNK);
+}
diff --git a/lib/psci/aarch32/psci_helpers.S b/lib/psci/aarch32/psci_helpers.S
new file mode 100644
index 00000000..9373d4f1
--- /dev/null
+++ b/lib/psci/aarch32/psci_helpers.S
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+#include <platform_def.h>
+#include <psci.h>
+
+	.globl	psci_do_pwrdown_cache_maintenance
+	.globl	psci_do_pwrup_cache_maintenance
+	.globl	psci_power_down_wfi
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrdown_cache_maintenance(unsigned int power level);
+ *
+ * This function performs cache maintenance for the specified power
+ * level. The levels of cache affected are determined by the power
+ * level which is passed as the argument i.e. level 0 results
+ * in a flush of the L1 cache. Both the L1 and L2 caches are flushed
+ * for a higher power level.
+ *
+ * Additionally, this function also ensures that stack memory is correctly
+ * flushed out to avoid coherency issues due to a change in its memory
+ * attributes after the data cache is disabled.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrdown_cache_maintenance
+	push	{r4, lr}
+
+	/* ----------------------------------------------
+	 * Turn OFF cache and do stack maintenance
+	 * prior to cpu operations . This sequence is
+	 * different from AArch64 because in AArch32 the
+	 * assembler routines for cpu operations utilize
+	 * the stack whereas in AArch64 it doesn't.
+	 * ----------------------------------------------
+	 */
+	mov	r4, r0
+	bl	do_stack_maintenance
+
+	/* ---------------------------------------------
+	 * Invoke CPU-specifc power down operations for
+	 * the appropriate level
+	 * ---------------------------------------------
+	 */
+	mov	r0, r4
+	pop	{r4, lr}
+	b	prepare_cpu_pwr_dwn
+endfunc psci_do_pwrdown_cache_maintenance
+
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrup_cache_maintenance(void);
+ *
+ * This function performs cache maintenance after this cpu is powered up.
+ * Currently, this involves managing the used stack memory before turning
+ * on the data cache.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrup_cache_maintenance
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
+
+	/* ---------------------------------------------
+	 * Ensure any inflight stack writes have made it
+	 * to main memory.
+	 * ---------------------------------------------
+	 */
+	dmb	st
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in r1. Calculate and store the
+	 * stack base address in r0.
+	 * ---------------------------------------------
+	 */
+	bl	plat_get_my_stack
+	mov	r1, sp
+	sub	r1, r0, r1
+	mov	r0, sp
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
+	 * Enable the data cache.
+	 * ---------------------------------------------
+	 */
+	ldcopr	r0, SCTLR
+	orr	r0, r0, #SCTLR_C_BIT
+	stcopr	r0, SCTLR
+	isb
+
+	pop	{r12, pc}
+endfunc psci_do_pwrup_cache_maintenance
+
+	/* ---------------------------------------------
+	 * void do_stack_maintenance(void)
+	 * Do stack maintenance by flushing the used
+	 * stack to the main memory and invalidating the
+	 * remainder.
+	 * ---------------------------------------------
+	 */
+func do_stack_maintenance
+	push	{r4, lr}
+	bl	plat_get_my_stack
+
+	/* Turn off the D-cache */
+	ldcopr	r1, SCTLR
+	bic	r1, #SCTLR_C_BIT
+	stcopr	r1, SCTLR
+	isb
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in r1.
+	 * ---------------------------------------------
+	 */
+	mov	r4, r0
+	mov	r1, sp
+	sub	r1, r0, r1
+	mov	r0, sp
+	bl	flush_dcache_range
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the unused
+	 * stack memory in r1. Calculate and store the
+	 * stack base address in r0.
+	 * ---------------------------------------------
+	 */
+	sub	r0, r4, #PLATFORM_STACK_SIZE
+	sub	r1, sp, r0
+	bl	inv_dcache_range
+
+	pop	{r4, pc}
+endfunc do_stack_maintenance
+
+/* -----------------------------------------------------------------------
+ * This function is called to indicate to the power controller that it
+ * is safe to power down this cpu. It should not exit the wfi and will
+ * be released from reset upon power up.
+ * -----------------------------------------------------------------------
+ */
+func psci_power_down_wfi
+	dsb	sy		// ensure write buffer empty
+	wfi
+	no_ret	plat_panic_handler
+endfunc psci_power_down_wfi
diff --git a/lib/psci/aarch64/psci_helpers.S b/lib/psci/aarch64/psci_helpers.S
new file mode 100644
index 00000000..afe21ebe
--- /dev/null
+++ b/lib/psci/aarch64/psci_helpers.S
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <platform_def.h>
+#include <psci.h>
+
+	.globl	psci_do_pwrdown_cache_maintenance
+	.globl	psci_do_pwrup_cache_maintenance
+	.globl	psci_power_down_wfi
+#if !ERROR_DEPRECATED
+	.globl psci_entrypoint
+#endif
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrdown_cache_maintenance(unsigned int power level);
+ *
+ * This function performs cache maintenance for the specified power
+ * level. The levels of cache affected are determined by the power
+ * level which is passed as the argument i.e. level 0 results
+ * in a flush of the L1 cache. Both the L1 and L2 caches are flushed
+ * for a higher power level.
+ *
+ * Additionally, this function also ensures that stack memory is correctly
+ * flushed out to avoid coherency issues due to a change in its memory
+ * attributes after the data cache is disabled.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrdown_cache_maintenance
+	stp     x29, x30, [sp,#-16]!
+	stp     x19, x20, [sp,#-16]!
+
+	/* ---------------------------------------------
+	 * Invoke CPU-specific power down operations for
+	 * the appropriate level
+	 * ---------------------------------------------
+	 */
+	bl	prepare_cpu_pwr_dwn
+
+	/* ---------------------------------------------
+	 * Do stack maintenance by flushing the used
+	 * stack to the main memory and invalidating the
+	 * remainder.
+	 * ---------------------------------------------
+	 */
+	bl	plat_get_my_stack
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in x1.
+	 * ---------------------------------------------
+	 */
+	mov	x19, x0
+	mov	x1, sp
+	sub	x1, x0, x1
+	mov	x0, sp
+	bl	flush_dcache_range
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the unused
+	 * stack memory in x1. Calculate and store the
+	 * stack base address in x0.
+	 * ---------------------------------------------
+	 */
+	sub	x0, x19, #PLATFORM_STACK_SIZE
+	sub	x1, sp, x0
+	bl	inv_dcache_range
+
+	ldp	x19, x20, [sp], #16
+	ldp	x29, x30, [sp], #16
+	ret
+endfunc psci_do_pwrdown_cache_maintenance
+
+
+/* -----------------------------------------------------------------------
+ * void psci_do_pwrup_cache_maintenance(void);
+ *
+ * This function performs cache maintenance after this cpu is powered up.
+ * Currently, this involves managing the used stack memory before turning
+ * on the data cache.
+ * -----------------------------------------------------------------------
+ */
+func psci_do_pwrup_cache_maintenance
+	stp	x29, x30, [sp,#-16]!
+
+	/* ---------------------------------------------
+	 * Ensure any inflight stack writes have made it
+	 * to main memory.
+	 * ---------------------------------------------
+	 */
+	dmb	st
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the used
+	 * stack memory in x1. Calculate and store the
+	 * stack base address in x0.
+	 * ---------------------------------------------
+	 */
+	bl	plat_get_my_stack
+	mov	x1, sp
+	sub	x1, x0, x1
+	mov	x0, sp
+	bl	inv_dcache_range
+
+	/* ---------------------------------------------
+	 * Enable the data cache.
+	 * ---------------------------------------------
+	 */
+	mrs	x0, sctlr_el3
+	orr	x0, x0, #SCTLR_C_BIT
+	msr	sctlr_el3, x0
+	isb
+
+	ldp	x29, x30, [sp], #16
+	ret
+endfunc psci_do_pwrup_cache_maintenance
+
+/* -----------------------------------------------------------------------
+ * void psci_power_down_wfi(void);
+ * This function is called to indicate to the power controller that it
+ * is safe to power down this cpu. It should not exit the wfi and will
+ * be released from reset upon power up.
+ * -----------------------------------------------------------------------
+ */
+func psci_power_down_wfi
+	dsb	sy		// ensure write buffer empty
+	wfi
+	no_ret	plat_panic_handler
+endfunc psci_power_down_wfi
+
+/* -----------------------------------------------------------------------
+ * void psci_entrypoint(void);
+ * The deprecated entry point for PSCI on warm boot for AArch64.
+ * -----------------------------------------------------------------------
+ */
+func_deprecated psci_entrypoint
+	b	bl31_warm_entrypoint
+endfunc_deprecated psci_entrypoint
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
new file mode 100644
index 00000000..4502c24b
--- /dev/null
+++ b/lib/psci/psci_common.c
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context.h>
+#include <context_mgmt.h>
+#include <debug.h>
+#include <platform.h>
+#include <string.h>
+#include <utils.h>
+#include "psci_private.h"
+
+/*
+ * SPD power management operations, expected to be supplied by the registered
+ * SPD on successful SP initialization
+ */
+const spd_pm_ops_t *psci_spd_pm;
+
+/*
+ * PSCI requested local power state map. This array is used to store the local
+ * power states requested by a CPU for power levels from level 1 to
+ * PLAT_MAX_PWR_LVL. It does not store the requested local power state for power
+ * level 0 (PSCI_CPU_PWR_LVL) as the requested and the target power state for a
+ * CPU are the same.
+ *
+ * During state coordination, the platform is passed an array containing the
+ * local states requested for a particular non cpu power domain by each cpu
+ * within the domain.
+ *
+ * TODO: Dense packing of the requested states will cause cache thrashing
+ * when multiple power domains write to it. If we allocate the requested
+ * states at each power level in a cache-line aligned per-domain memory,
+ * the cache thrashing can be avoided.
+ */
+static plat_local_state_t
+	psci_req_local_pwr_states[PLAT_MAX_PWR_LVL][PLATFORM_CORE_COUNT];
+
+
+/*******************************************************************************
+ * Arrays that hold the platform's power domain tree information for state
+ * management of power domains.
+ * Each node in the array 'psci_non_cpu_pd_nodes' corresponds to a power domain
+ * which is an ancestor of a CPU power domain.
+ * Each node in the array 'psci_cpu_pd_nodes' corresponds to a cpu power domain
+ ******************************************************************************/
+non_cpu_pd_node_t psci_non_cpu_pd_nodes[PSCI_NUM_NON_CPU_PWR_DOMAINS]
+#if USE_COHERENT_MEM
+__section("tzfw_coherent_mem")
+#endif
+;
+
+/* Lock for PSCI state coordination */
+DEFINE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
+cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
+
+/*******************************************************************************
+ * Pointer to functions exported by the platform to complete power mgmt. ops
+ ******************************************************************************/
+const plat_psci_ops_t *psci_plat_pm_ops;
+
+/******************************************************************************
+ * Check that the maximum power level supported by the platform makes sense
+ *****************************************************************************/
+CASSERT(PLAT_MAX_PWR_LVL <= PSCI_MAX_PWR_LVL && \
+		PLAT_MAX_PWR_LVL >= PSCI_CPU_PWR_LVL, \
+		assert_platform_max_pwrlvl_check);
+
+/*
+ * The plat_local_state used by the platform is one of these types: RUN,
+ * RETENTION and OFF. The platform can define further sub-states for each type
+ * apart from RUN. This categorization is done to verify the sanity of the
+ * psci_power_state passed by the platform and to print debug information. The
+ * categorization is done on the basis of the following conditions:
+ *
+ * 1. If (plat_local_state == 0) then the category is STATE_TYPE_RUN.
+ *
+ * 2. If (0 < plat_local_state <= PLAT_MAX_RET_STATE), then the category is
+ *    STATE_TYPE_RETN.
+ *
+ * 3. If (plat_local_state > PLAT_MAX_RET_STATE), then the category is
+ *    STATE_TYPE_OFF.
+ */
+typedef enum plat_local_state_type {
+	STATE_TYPE_RUN = 0,
+	STATE_TYPE_RETN,
+	STATE_TYPE_OFF
+} plat_local_state_type_t;
+
+/* The macro used to categorize plat_local_state. */
+#define find_local_state_type(plat_local_state)					\
+		((plat_local_state) ? ((plat_local_state > PLAT_MAX_RET_STATE)	\
+		? STATE_TYPE_OFF : STATE_TYPE_RETN)				\
+		: STATE_TYPE_RUN)
+
+/******************************************************************************
+ * Check that the maximum retention level supported by the platform is less
+ * than the maximum off level.
+ *****************************************************************************/
+CASSERT(PLAT_MAX_RET_STATE < PLAT_MAX_OFF_STATE, \
+		assert_platform_max_off_and_retn_state_check);
+
+/******************************************************************************
+ * This function ensures that the power state parameter in a CPU_SUSPEND request
+ * is valid. If so, it returns the requested states for each power level.
+ *****************************************************************************/
+int psci_validate_power_state(unsigned int power_state,
+			      psci_power_state_t *state_info)
+{
+	/* Check SBZ bits in power state are zero */
+	if (psci_check_power_state(power_state))
+		return PSCI_E_INVALID_PARAMS;
+
+	assert(psci_plat_pm_ops->validate_power_state);
+
+	/* Validate the power_state using platform pm_ops */
+	return psci_plat_pm_ops->validate_power_state(power_state, state_info);
+}
+
+/******************************************************************************
+ * This function retrieves the `psci_power_state_t` for system suspend from
+ * the platform.
+ *****************************************************************************/
+void psci_query_sys_suspend_pwrstate(psci_power_state_t *state_info)
+{
+	/*
+	 * Assert that the required pm_ops hook is implemented to ensure that
+	 * the capability detected during psci_setup() is valid.
+	 */
+	assert(psci_plat_pm_ops->get_sys_suspend_power_state);
+
+	/*
+	 * Query the platform for the power_state required for system suspend
+	 */
+	psci_plat_pm_ops->get_sys_suspend_power_state(state_info);
+}
+
+/*******************************************************************************
+ * This function verifies that the all the other cores in the system have been
+ * turned OFF and the current CPU is the last running CPU in the system.
+ * Returns 1 (true) if the current CPU is the last ON CPU or 0 (false)
+ * otherwise.
+ ******************************************************************************/
+unsigned int psci_is_last_on_cpu(void)
+{
+	unsigned int cpu_idx, my_idx = plat_my_core_pos();
+
+	for (cpu_idx = 0; cpu_idx < PLATFORM_CORE_COUNT; cpu_idx++) {
+		if (cpu_idx == my_idx) {
+			assert(psci_get_aff_info_state() == AFF_STATE_ON);
+			continue;
+		}
+
+		if (psci_get_aff_info_state_by_idx(cpu_idx) != AFF_STATE_OFF)
+			return 0;
+	}
+
+	return 1;
+}
+
+/*******************************************************************************
+ * Routine to return the maximum power level to traverse to after a cpu has
+ * been physically powered up. It is expected to be called immediately after
+ * reset from assembler code.
+ ******************************************************************************/
+static unsigned int get_power_on_target_pwrlvl(void)
+{
+	unsigned int pwrlvl;
+
+	/*
+	 * Assume that this cpu was suspended and retrieve its target power
+	 * level. If it is invalid then it could only have been turned off
+	 * earlier. PLAT_MAX_PWR_LVL will be the highest power level a
+	 * cpu can be turned off to.
+	 */
+	pwrlvl = psci_get_suspend_pwrlvl();
+	if (pwrlvl == PSCI_INVALID_PWR_LVL)
+		pwrlvl = PLAT_MAX_PWR_LVL;
+	return pwrlvl;
+}
+
+/******************************************************************************
+ * Helper function to update the requested local power state array. This array
+ * does not store the requested state for the CPU power level. Hence an
+ * assertion is added to prevent us from accessing the wrong index.
+ *****************************************************************************/
+static void psci_set_req_local_pwr_state(unsigned int pwrlvl,
+					 unsigned int cpu_idx,
+					 plat_local_state_t req_pwr_state)
+{
+	/*
+	 * This should never happen, we have this here to avoid
+	 * "array subscript is above array bounds" errors in GCC.
+	 */
+	assert(pwrlvl > PSCI_CPU_PWR_LVL);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+	psci_req_local_pwr_states[pwrlvl - 1][cpu_idx] = req_pwr_state;
+#pragma GCC diagnostic pop
+}
+
+/******************************************************************************
+ * This function initializes the psci_req_local_pwr_states.
+ *****************************************************************************/
+void psci_init_req_local_pwr_states(void)
+{
+	/* Initialize the requested state of all non CPU power domains as OFF */
+	memset(&psci_req_local_pwr_states, PLAT_MAX_OFF_STATE,
+			sizeof(psci_req_local_pwr_states));
+}
+
+/******************************************************************************
+ * Helper function to return a reference to an array containing the local power
+ * states requested by each cpu for a power domain at 'pwrlvl'. The size of the
+ * array will be the number of cpu power domains of which this power domain is
+ * an ancestor. These requested states will be used to determine a suitable
+ * target state for this power domain during psci state coordination. An
+ * assertion is added to prevent us from accessing the CPU power level.
+ *****************************************************************************/
+static plat_local_state_t *psci_get_req_local_pwr_states(unsigned int pwrlvl,
+							 unsigned int cpu_idx)
+{
+	assert(pwrlvl > PSCI_CPU_PWR_LVL);
+
+	return &psci_req_local_pwr_states[pwrlvl - 1][cpu_idx];
+}
+
+/*
+ * psci_non_cpu_pd_nodes can be placed either in normal memory or coherent
+ * memory.
+ *
+ * With !USE_COHERENT_MEM, psci_non_cpu_pd_nodes is placed in normal memory,
+ * it's accessed by both cached and non-cached participants. To serve the common
+ * minimum, perform a cache flush before read and after write so that non-cached
+ * participants operate on latest data in main memory.
+ *
+ * When USE_COHERENT_MEM is used, psci_non_cpu_pd_nodes is placed in coherent
+ * memory. With HW_ASSISTED_COHERENCY, all PSCI participants are cache-coherent.
+ * In both cases, no cache operations are required.
+ */
+
+/*
+ * Retrieve local state of non-CPU power domain node from a non-cached CPU,
+ * after any required cache maintenance operation.
+ */
+static plat_local_state_t get_non_cpu_pd_node_local_state(
+		unsigned int parent_idx)
+{
+#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
+	flush_dcache_range(
+			(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
+			sizeof(psci_non_cpu_pd_nodes[parent_idx]));
+#endif
+	return psci_non_cpu_pd_nodes[parent_idx].local_state;
+}
+
+/*
+ * Update local state of non-CPU power domain node from a cached CPU; perform
+ * any required cache maintenance operation afterwards.
+ */
+static void set_non_cpu_pd_node_local_state(unsigned int parent_idx,
+		plat_local_state_t state)
+{
+	psci_non_cpu_pd_nodes[parent_idx].local_state = state;
+#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY
+	flush_dcache_range(
+			(uintptr_t) &psci_non_cpu_pd_nodes[parent_idx],
+			sizeof(psci_non_cpu_pd_nodes[parent_idx]));
+#endif
+}
+
+/******************************************************************************
+ * Helper function to return the current local power state of each power domain
+ * from the current cpu power domain to its ancestor at the 'end_pwrlvl'. This
+ * function will be called after a cpu is powered on to find the local state
+ * each power domain has emerged from.
+ *****************************************************************************/
+void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
+				      psci_power_state_t *target_state)
+{
+	unsigned int parent_idx, lvl;
+	plat_local_state_t *pd_state = target_state->pwr_domain_state;
+
+	pd_state[PSCI_CPU_PWR_LVL] = psci_get_cpu_local_state();
+	parent_idx = psci_cpu_pd_nodes[plat_my_core_pos()].parent_node;
+
+	/* Copy the local power state from node to state_info */
+	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
+		pd_state[lvl] = get_non_cpu_pd_node_local_state(parent_idx);
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+
+	/* Set the the higher levels to RUN */
+	for (; lvl <= PLAT_MAX_PWR_LVL; lvl++)
+		target_state->pwr_domain_state[lvl] = PSCI_LOCAL_STATE_RUN;
+}
+
+/******************************************************************************
+ * Helper function to set the target local power state that each power domain
+ * from the current cpu power domain to its ancestor at the 'end_pwrlvl' will
+ * enter. This function will be called after coordination of requested power
+ * states has been done for each power level.
+ *****************************************************************************/
+static void psci_set_target_local_pwr_states(unsigned int end_pwrlvl,
+					const psci_power_state_t *target_state)
+{
+	unsigned int parent_idx, lvl;
+	const plat_local_state_t *pd_state = target_state->pwr_domain_state;
+
+	psci_set_cpu_local_state(pd_state[PSCI_CPU_PWR_LVL]);
+
+	/*
+	 * Need to flush as local_state might be accessed with Data Cache
+	 * disabled during power on
+	 */
+	psci_flush_cpu_data(psci_svc_cpu_data.local_state);
+
+	parent_idx = psci_cpu_pd_nodes[plat_my_core_pos()].parent_node;
+
+	/* Copy the local_state from state_info */
+	for (lvl = 1; lvl <= end_pwrlvl; lvl++) {
+		set_non_cpu_pd_node_local_state(parent_idx, pd_state[lvl]);
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+}
+
+
+/*******************************************************************************
+ * PSCI helper function to get the parent nodes corresponding to a cpu_index.
+ ******************************************************************************/
+void psci_get_parent_pwr_domain_nodes(unsigned int cpu_idx,
+				      unsigned int end_lvl,
+				      unsigned int node_index[])
+{
+	unsigned int parent_node = psci_cpu_pd_nodes[cpu_idx].parent_node;
+	unsigned int i;
+
+	for (i = PSCI_CPU_PWR_LVL + 1; i <= end_lvl; i++) {
+		*node_index++ = parent_node;
+		parent_node = psci_non_cpu_pd_nodes[parent_node].parent_node;
+	}
+}
+
+/******************************************************************************
+ * This function is invoked post CPU power up and initialization. It sets the
+ * affinity info state, target power state and requested power state for the
+ * current CPU and all its ancestor power domains to RUN.
+ *****************************************************************************/
+void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl)
+{
+	unsigned int parent_idx, cpu_idx = plat_my_core_pos(), lvl;
+	parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+
+	/* Reset the local_state to RUN for the non cpu power domains. */
+	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
+		set_non_cpu_pd_node_local_state(parent_idx,
+				PSCI_LOCAL_STATE_RUN);
+		psci_set_req_local_pwr_state(lvl,
+					     cpu_idx,
+					     PSCI_LOCAL_STATE_RUN);
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+
+	/* Set the affinity info state to ON */
+	psci_set_aff_info_state(AFF_STATE_ON);
+
+	psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN);
+	psci_flush_cpu_data(psci_svc_cpu_data);
+}
+
+/******************************************************************************
+ * This function is passed the local power states requested for each power
+ * domain (state_info) between the current CPU domain and its ancestors until
+ * the target power level (end_pwrlvl). It updates the array of requested power
+ * states with this information.
+ *
+ * Then, for each level (apart from the CPU level) until the 'end_pwrlvl', it
+ * retrieves the states requested by all the cpus of which the power domain at
+ * that level is an ancestor. It passes this information to the platform to
+ * coordinate and return the target power state. If the target state for a level
+ * is RUN then subsequent levels are not considered. At the CPU level, state
+ * coordination is not required. Hence, the requested and the target states are
+ * the same.
+ *
+ * The 'state_info' is updated with the target state for each level between the
+ * CPU and the 'end_pwrlvl' and returned to the caller.
+ *
+ * This function will only be invoked with data cache enabled and while
+ * powering down a core.
+ *****************************************************************************/
+void psci_do_state_coordination(unsigned int end_pwrlvl,
+				psci_power_state_t *state_info)
+{
+	unsigned int lvl, parent_idx, cpu_idx = plat_my_core_pos();
+	unsigned int start_idx, ncpus;
+	plat_local_state_t target_state, *req_states;
+
+	assert(end_pwrlvl <= PLAT_MAX_PWR_LVL);
+	parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+
+	/* For level 0, the requested state will be equivalent
+	   to target state */
+	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
+
+		/* First update the requested power state */
+		psci_set_req_local_pwr_state(lvl, cpu_idx,
+					     state_info->pwr_domain_state[lvl]);
+
+		/* Get the requested power states for this power level */
+		start_idx = psci_non_cpu_pd_nodes[parent_idx].cpu_start_idx;
+		req_states = psci_get_req_local_pwr_states(lvl, start_idx);
+
+		/*
+		 * Let the platform coordinate amongst the requested states at
+		 * this power level and return the target local power state.
+		 */
+		ncpus = psci_non_cpu_pd_nodes[parent_idx].ncpus;
+		target_state = plat_get_target_pwr_state(lvl,
+							 req_states,
+							 ncpus);
+
+		state_info->pwr_domain_state[lvl] = target_state;
+
+		/* Break early if the negotiated target power state is RUN */
+		if (is_local_state_run(state_info->pwr_domain_state[lvl]))
+			break;
+
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+
+	/*
+	 * This is for cases when we break out of the above loop early because
+	 * the target power state is RUN at a power level < end_pwlvl.
+	 * We update the requested power state from state_info and then
+	 * set the target state as RUN.
+	 */
+	for (lvl = lvl + 1; lvl <= end_pwrlvl; lvl++) {
+		psci_set_req_local_pwr_state(lvl, cpu_idx,
+					     state_info->pwr_domain_state[lvl]);
+		state_info->pwr_domain_state[lvl] = PSCI_LOCAL_STATE_RUN;
+
+	}
+
+	/* Update the target state in the power domain nodes */
+	psci_set_target_local_pwr_states(end_pwrlvl, state_info);
+}
+
+/******************************************************************************
+ * This function validates a suspend request by making sure that if a standby
+ * state is requested then no power level is turned off and the highest power
+ * level is placed in a standby/retention state.
+ *
+ * It also ensures that the state level X will enter is not shallower than the
+ * state level X + 1 will enter.
+ *
+ * This validation will be enabled only for DEBUG builds as the platform is
+ * expected to perform these validations as well.
+ *****************************************************************************/
+int psci_validate_suspend_req(const psci_power_state_t *state_info,
+			      unsigned int is_power_down_state)
+{
+	unsigned int max_off_lvl, target_lvl, max_retn_lvl;
+	plat_local_state_t state;
+	plat_local_state_type_t req_state_type, deepest_state_type;
+	int i;
+
+	/* Find the target suspend power level */
+	target_lvl = psci_find_target_suspend_lvl(state_info);
+	if (target_lvl == PSCI_INVALID_PWR_LVL)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* All power domain levels are in a RUN state to begin with */
+	deepest_state_type = STATE_TYPE_RUN;
+
+	for (i = target_lvl; i >= PSCI_CPU_PWR_LVL; i--) {
+		state = state_info->pwr_domain_state[i];
+		req_state_type = find_local_state_type(state);
+
+		/*
+		 * While traversing from the highest power level to the lowest,
+		 * the state requested for lower levels has to be the same or
+		 * deeper i.e. equal to or greater than the state at the higher
+		 * levels. If this condition is true, then the requested state
+		 * becomes the deepest state encountered so far.
+		 */
+		if (req_state_type < deepest_state_type)
+			return PSCI_E_INVALID_PARAMS;
+		deepest_state_type = req_state_type;
+	}
+
+	/* Find the highest off power level */
+	max_off_lvl = psci_find_max_off_lvl(state_info);
+
+	/* The target_lvl is either equal to the max_off_lvl or max_retn_lvl */
+	max_retn_lvl = PSCI_INVALID_PWR_LVL;
+	if (target_lvl != max_off_lvl)
+		max_retn_lvl = target_lvl;
+
+	/*
+	 * If this is not a request for a power down state then max off level
+	 * has to be invalid and max retention level has to be a valid power
+	 * level.
+	 */
+	if (!is_power_down_state && (max_off_lvl != PSCI_INVALID_PWR_LVL ||
+				    max_retn_lvl == PSCI_INVALID_PWR_LVL))
+		return PSCI_E_INVALID_PARAMS;
+
+	return PSCI_E_SUCCESS;
+}
+
+/******************************************************************************
+ * This function finds the highest power level which will be powered down
+ * amongst all the power levels specified in the 'state_info' structure
+ *****************************************************************************/
+unsigned int psci_find_max_off_lvl(const psci_power_state_t *state_info)
+{
+	int i;
+
+	for (i = PLAT_MAX_PWR_LVL; i >= PSCI_CPU_PWR_LVL; i--) {
+		if (is_local_state_off(state_info->pwr_domain_state[i]))
+			return i;
+	}
+
+	return PSCI_INVALID_PWR_LVL;
+}
+
+/******************************************************************************
+ * This functions finds the level of the highest power domain which will be
+ * placed in a low power state during a suspend operation.
+ *****************************************************************************/
+unsigned int psci_find_target_suspend_lvl(const psci_power_state_t *state_info)
+{
+	int i;
+
+	for (i = PLAT_MAX_PWR_LVL; i >= PSCI_CPU_PWR_LVL; i--) {
+		if (!is_local_state_run(state_info->pwr_domain_state[i]))
+			return i;
+	}
+
+	return PSCI_INVALID_PWR_LVL;
+}
+
+/*******************************************************************************
+ * This function is passed a cpu_index and the highest level in the topology
+ * tree that the operation should be applied to. It picks up locks in order of
+ * increasing power domain level in the range specified.
+ ******************************************************************************/
+void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl,
+				   unsigned int cpu_idx)
+{
+	unsigned int parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+	unsigned int level;
+
+	/* No locking required for level 0. Hence start locking from level 1 */
+	for (level = PSCI_CPU_PWR_LVL + 1; level <= end_pwrlvl; level++) {
+		psci_lock_get(&psci_non_cpu_pd_nodes[parent_idx]);
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+}
+
+/*******************************************************************************
+ * This function is passed a cpu_index and the highest level in the topology
+ * tree that the operation should be applied to. It releases the locks in order
+ * of decreasing power domain level in the range specified.
+ ******************************************************************************/
+void psci_release_pwr_domain_locks(unsigned int end_pwrlvl,
+				   unsigned int cpu_idx)
+{
+	unsigned int parent_idx, parent_nodes[PLAT_MAX_PWR_LVL] = {0};
+	int level;
+
+	/* Get the parent nodes */
+	psci_get_parent_pwr_domain_nodes(cpu_idx, end_pwrlvl, parent_nodes);
+
+	/* Unlock top down. No unlocking required for level 0. */
+	for (level = end_pwrlvl; level >= PSCI_CPU_PWR_LVL + 1; level--) {
+		parent_idx = parent_nodes[level - 1];
+		psci_lock_release(&psci_non_cpu_pd_nodes[parent_idx]);
+	}
+}
+
+/*******************************************************************************
+ * Simple routine to determine whether a mpidr is valid or not.
+ ******************************************************************************/
+int psci_validate_mpidr(u_register_t mpidr)
+{
+	if (plat_core_pos_by_mpidr(mpidr) < 0)
+		return PSCI_E_INVALID_PARAMS;
+
+	return PSCI_E_SUCCESS;
+}
+
+/*******************************************************************************
+ * This function determines the full entrypoint information for the requested
+ * PSCI entrypoint on power on/resume and returns it.
+ ******************************************************************************/
+#ifdef AARCH32
+static int psci_get_ns_ep_info(entry_point_info_t *ep,
+			       uintptr_t entrypoint,
+			       u_register_t context_id)
+{
+	u_register_t ep_attr;
+	unsigned int aif, ee, mode;
+	u_register_t scr = read_scr();
+	u_register_t ns_sctlr, sctlr;
+
+	/* Switch to non secure state */
+	write_scr(scr | SCR_NS_BIT);
+	isb();
+	ns_sctlr = read_sctlr();
+
+	sctlr = scr & SCR_HCE_BIT ? read_hsctlr() : ns_sctlr;
+
+	/* Return to original state */
+	write_scr(scr);
+	isb();
+	ee = 0;
+
+	ep_attr = NON_SECURE | EP_ST_DISABLE;
+	if (sctlr & SCTLR_EE_BIT) {
+		ep_attr |= EP_EE_BIG;
+		ee = 1;
+	}
+	SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr);
+
+	ep->pc = entrypoint;
+	zeromem(&ep->args, sizeof(ep->args));
+	ep->args.arg0 = context_id;
+
+	mode = scr & SCR_HCE_BIT ? MODE32_hyp : MODE32_svc;
+
+	/*
+	 * TODO: Choose async. exception bits if HYP mode is not
+	 * implemented according to the values of SCR.{AW, FW} bits
+	 */
+	aif = SPSR_ABT_BIT | SPSR_IRQ_BIT | SPSR_FIQ_BIT;
+
+	ep->spsr = SPSR_MODE32(mode, entrypoint & 0x1, ee, aif);
+
+	return PSCI_E_SUCCESS;
+}
+
+#else
+static int psci_get_ns_ep_info(entry_point_info_t *ep,
+			       uintptr_t entrypoint,
+			       u_register_t context_id)
+{
+	u_register_t ep_attr, sctlr;
+	unsigned int daif, ee, mode;
+	u_register_t ns_scr_el3 = read_scr_el3();
+	u_register_t ns_sctlr_el1 = read_sctlr_el1();
+
+	sctlr = ns_scr_el3 & SCR_HCE_BIT ? read_sctlr_el2() : ns_sctlr_el1;
+	ee = 0;
+
+	ep_attr = NON_SECURE | EP_ST_DISABLE;
+	if (sctlr & SCTLR_EE_BIT) {
+		ep_attr |= EP_EE_BIG;
+		ee = 1;
+	}
+	SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr);
+
+	ep->pc = entrypoint;
+	zeromem(&ep->args, sizeof(ep->args));
+	ep->args.arg0 = context_id;
+
+	/*
+	 * Figure out whether the cpu enters the non-secure address space
+	 * in aarch32 or aarch64
+	 */
+	if (ns_scr_el3 & SCR_RW_BIT) {
+
+		/*
+		 * Check whether a Thumb entry point has been provided for an
+		 * aarch64 EL
+		 */
+		if (entrypoint & 0x1)
+			return PSCI_E_INVALID_ADDRESS;
+
+		mode = ns_scr_el3 & SCR_HCE_BIT ? MODE_EL2 : MODE_EL1;
+
+		ep->spsr = SPSR_64(mode, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS);
+	} else {
+
+		mode = ns_scr_el3 & SCR_HCE_BIT ? MODE32_hyp : MODE32_svc;
+
+		/*
+		 * TODO: Choose async. exception bits if HYP mode is not
+		 * implemented according to the values of SCR.{AW, FW} bits
+		 */
+		daif = DAIF_ABT_BIT | DAIF_IRQ_BIT | DAIF_FIQ_BIT;
+
+		ep->spsr = SPSR_MODE32(mode, entrypoint & 0x1, ee, daif);
+	}
+
+	return PSCI_E_SUCCESS;
+}
+#endif
+
+/*******************************************************************************
+ * This function validates the entrypoint with the platform layer if the
+ * appropriate pm_ops hook is exported by the platform and returns the
+ * 'entry_point_info'.
+ ******************************************************************************/
+int psci_validate_entry_point(entry_point_info_t *ep,
+			      uintptr_t entrypoint,
+			      u_register_t context_id)
+{
+	int rc;
+
+	/* Validate the entrypoint using platform psci_ops */
+	if (psci_plat_pm_ops->validate_ns_entrypoint) {
+		rc = psci_plat_pm_ops->validate_ns_entrypoint(entrypoint);
+		if (rc != PSCI_E_SUCCESS)
+			return PSCI_E_INVALID_ADDRESS;
+	}
+
+	/*
+	 * Verify and derive the re-entry information for
+	 * the non-secure world from the non-secure state from
+	 * where this call originated.
+	 */
+	rc = psci_get_ns_ep_info(ep, entrypoint, context_id);
+	return rc;
+}
+
+/*******************************************************************************
+ * Generic handler which is called when a cpu is physically powered on. It
+ * traverses the node information and finds the highest power level powered
+ * off and performs generic, architectural, platform setup and state management
+ * to power on that power level and power levels below it.
+ * e.g. For a cpu that's been powered on, it will call the platform specific
+ * code to enable the gic cpu interface and for a cluster it will enable
+ * coherency at the interconnect level in addition to gic cpu interface.
+ ******************************************************************************/
+void psci_warmboot_entrypoint(void)
+{
+	unsigned int end_pwrlvl, cpu_idx = plat_my_core_pos();
+	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
+
+	/*
+	 * Verify that we have been explicitly turned ON or resumed from
+	 * suspend.
+	 */
+	if (psci_get_aff_info_state() == AFF_STATE_OFF) {
+		ERROR("Unexpected affinity info state");
+		panic();
+	}
+
+	/*
+	 * Get the maximum power domain level to traverse to after this cpu
+	 * has been physically powered up.
+	 */
+	end_pwrlvl = get_power_on_target_pwrlvl();
+
+	/*
+	 * This function acquires the lock corresponding to each power level so
+	 * that by the time all locks are taken, the system topology is snapshot
+	 * and state management can be done safely.
+	 */
+	psci_acquire_pwr_domain_locks(end_pwrlvl,
+				      cpu_idx);
+
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_stop(&state_info);
+#endif
+
+	psci_get_target_local_pwr_states(end_pwrlvl, &state_info);
+
+	/*
+	 * This CPU could be resuming from suspend or it could have just been
+	 * turned on. To distinguish between these 2 cases, we examine the
+	 * affinity state of the CPU:
+	 *  - If the affinity state is ON_PENDING then it has just been
+	 *    turned on.
+	 *  - Else it is resuming from suspend.
+	 *
+	 * Depending on the type of warm reset identified, choose the right set
+	 * of power management handler and perform the generic, architecture
+	 * and platform specific handling.
+	 */
+	if (psci_get_aff_info_state() == AFF_STATE_ON_PENDING)
+		psci_cpu_on_finish(cpu_idx, &state_info);
+	else
+		psci_cpu_suspend_finish(cpu_idx, &state_info);
+
+	/*
+	 * Set the requested and target state of this CPU and all the higher
+	 * power domains which are ancestors of this CPU to run.
+	 */
+	psci_set_pwr_domains_to_run(end_pwrlvl);
+
+#if ENABLE_PSCI_STAT
+	/*
+	 * Update PSCI stats.
+	 * Caches are off when writing stats data on the power down path.
+	 * Since caches are now enabled, it's necessary to do cache
+	 * maintenance before reading that same data.
+	 */
+	psci_stats_update_pwr_up(end_pwrlvl, &state_info);
+#endif
+
+	/*
+	 * This loop releases the lock corresponding to each power level
+	 * in the reverse order to which they were acquired.
+	 */
+	psci_release_pwr_domain_locks(end_pwrlvl,
+				      cpu_idx);
+}
+
+/*******************************************************************************
+ * This function initializes the set of hooks that PSCI invokes as part of power
+ * management operation. The power management hooks are expected to be provided
+ * by the SPD, after it finishes all its initialization
+ ******************************************************************************/
+void psci_register_spd_pm_hook(const spd_pm_ops_t *pm)
+{
+	assert(pm);
+	psci_spd_pm = pm;
+
+	if (pm->svc_migrate)
+		psci_caps |= define_psci_cap(PSCI_MIG_AARCH64);
+
+	if (pm->svc_migrate_info)
+		psci_caps |= define_psci_cap(PSCI_MIG_INFO_UP_CPU_AARCH64)
+				| define_psci_cap(PSCI_MIG_INFO_TYPE);
+}
+
+/*******************************************************************************
+ * This function invokes the migrate info hook in the spd_pm_ops. It performs
+ * the necessary return value validation. If the Secure Payload is UP and
+ * migrate capable, it returns the mpidr of the CPU on which the Secure payload
+ * is resident through the mpidr parameter. Else the value of the parameter on
+ * return is undefined.
+ ******************************************************************************/
+int psci_spd_migrate_info(u_register_t *mpidr)
+{
+	int rc;
+
+	if (!psci_spd_pm || !psci_spd_pm->svc_migrate_info)
+		return PSCI_E_NOT_SUPPORTED;
+
+	rc = psci_spd_pm->svc_migrate_info(mpidr);
+
+	assert(rc == PSCI_TOS_UP_MIG_CAP || rc == PSCI_TOS_NOT_UP_MIG_CAP \
+		|| rc == PSCI_TOS_NOT_PRESENT_MP || rc == PSCI_E_NOT_SUPPORTED);
+
+	return rc;
+}
+
+
+/*******************************************************************************
+ * This function prints the state of all power domains present in the
+ * system
+ ******************************************************************************/
+void psci_print_power_domain_map(void)
+{
+#if LOG_LEVEL >= LOG_LEVEL_INFO
+	unsigned int idx;
+	plat_local_state_t state;
+	plat_local_state_type_t state_type;
+
+	/* This array maps to the PSCI_STATE_X definitions in psci.h */
+	static const char * const psci_state_type_str[] = {
+		"ON",
+		"RETENTION",
+		"OFF",
+	};
+
+	INFO("PSCI Power Domain Map:\n");
+	for (idx = 0; idx < (PSCI_NUM_PWR_DOMAINS - PLATFORM_CORE_COUNT);
+							idx++) {
+		state_type = find_local_state_type(
+				psci_non_cpu_pd_nodes[idx].local_state);
+		INFO("  Domain Node : Level %u, parent_node %d,"
+				" State %s (0x%x)\n",
+				psci_non_cpu_pd_nodes[idx].level,
+				psci_non_cpu_pd_nodes[idx].parent_node,
+				psci_state_type_str[state_type],
+				psci_non_cpu_pd_nodes[idx].local_state);
+	}
+
+	for (idx = 0; idx < PLATFORM_CORE_COUNT; idx++) {
+		state = psci_get_cpu_local_state_by_idx(idx);
+		state_type = find_local_state_type(state);
+		INFO("  CPU Node : MPID 0x%llx, parent_node %d,"
+				" State %s (0x%x)\n",
+				(unsigned long long)psci_cpu_pd_nodes[idx].mpidr,
+				psci_cpu_pd_nodes[idx].parent_node,
+				psci_state_type_str[state_type],
+				psci_get_cpu_local_state_by_idx(idx));
+	}
+#endif
+}
+
+/******************************************************************************
+ * Return whether any secondaries were powered up with CPU_ON call. A CPU that
+ * have ever been powered up would have set its MPDIR value to something other
+ * than PSCI_INVALID_MPIDR. Note that MPDIR isn't reset back to
+ * PSCI_INVALID_MPIDR when a CPU is powered down later, so the return value is
+ * meaningful only when called on the primary CPU during early boot.
+ *****************************************************************************/
+int psci_secondaries_brought_up(void)
+{
+	unsigned int idx, n_valid = 0;
+
+	for (idx = 0; idx < ARRAY_SIZE(psci_cpu_pd_nodes); idx++) {
+		if (psci_cpu_pd_nodes[idx].mpidr != PSCI_INVALID_MPIDR)
+			n_valid++;
+	}
+
+	assert(n_valid);
+
+	return (n_valid > 1);
+}
+
+#if ENABLE_PLAT_COMPAT
+/*******************************************************************************
+ * PSCI Compatibility helper function to return the 'power_state' parameter of
+ * the PSCI CPU SUSPEND request for the current CPU. Returns PSCI_INVALID_DATA
+ * if not invoked within CPU_SUSPEND for the current CPU.
+ ******************************************************************************/
+int psci_get_suspend_powerstate(void)
+{
+	/* Sanity check to verify that CPU is within CPU_SUSPEND */
+	if (psci_get_aff_info_state() == AFF_STATE_ON &&
+		!is_local_state_run(psci_get_cpu_local_state()))
+		return psci_power_state_compat[plat_my_core_pos()];
+
+	return PSCI_INVALID_DATA;
+}
+
+/*******************************************************************************
+ * PSCI Compatibility helper function to return the state id of the current
+ * cpu encoded in the 'power_state' parameter. Returns PSCI_INVALID_DATA
+ * if not invoked within CPU_SUSPEND for the current CPU.
+ ******************************************************************************/
+int psci_get_suspend_stateid(void)
+{
+	unsigned int power_state;
+	power_state = psci_get_suspend_powerstate();
+	if (power_state != PSCI_INVALID_DATA)
+		return psci_get_pstate_id(power_state);
+
+	return PSCI_INVALID_DATA;
+}
+
+/*******************************************************************************
+ * PSCI Compatibility helper function to return the state id encoded in the
+ * 'power_state' parameter of the CPU specified by 'mpidr'. Returns
+ * PSCI_INVALID_DATA if the CPU is not in CPU_SUSPEND.
+ ******************************************************************************/
+int psci_get_suspend_stateid_by_mpidr(unsigned long mpidr)
+{
+	int cpu_idx = plat_core_pos_by_mpidr(mpidr);
+
+	if (cpu_idx == -1)
+		return PSCI_INVALID_DATA;
+
+	/* Sanity check to verify that the CPU is in CPU_SUSPEND */
+	if (psci_get_aff_info_state_by_idx(cpu_idx) == AFF_STATE_ON &&
+		!is_local_state_run(psci_get_cpu_local_state_by_idx(cpu_idx)))
+		return psci_get_pstate_id(psci_power_state_compat[cpu_idx]);
+
+	return PSCI_INVALID_DATA;
+}
+
+/*******************************************************************************
+ * This function returns highest affinity level which is in OFF
+ * state. The affinity instance with which the level is associated is
+ * determined by the caller.
+ ******************************************************************************/
+unsigned int psci_get_max_phys_off_afflvl(void)
+{
+	psci_power_state_t state_info;
+
+	zeromem(&state_info, sizeof(state_info));
+	psci_get_target_local_pwr_states(PLAT_MAX_PWR_LVL, &state_info);
+
+	return psci_find_target_suspend_lvl(&state_info);
+}
+
+/*******************************************************************************
+ * PSCI Compatibility helper function to return target affinity level requested
+ * for the CPU_SUSPEND. This function assumes affinity levels correspond to
+ * power domain levels on the platform.
+ ******************************************************************************/
+int psci_get_suspend_afflvl(void)
+{
+	return psci_get_suspend_pwrlvl();
+}
+
+#endif
+
+/*******************************************************************************
+ * Initiate power down sequence, by calling power down operations registered for
+ * this CPU.
+ ******************************************************************************/
+void psci_do_pwrdown_sequence(unsigned int power_level)
+{
+#if HW_ASSISTED_COHERENCY
+	/*
+	 * With hardware-assisted coherency, the CPU drivers only initiate the
+	 * power down sequence, without performing cache-maintenance operations
+	 * in software. Data caches and MMU remain enabled both before and after
+	 * this call.
+	 */
+	prepare_cpu_pwr_dwn(power_level);
+#else
+	/*
+	 * Without hardware-assisted coherency, the CPU drivers disable data
+	 * caches and MMU, then perform cache-maintenance operations in
+	 * software.
+	 *
+	 * We ought to call prepare_cpu_pwr_dwn() to initiate power down
+	 * sequence. We currently have data caches and MMU enabled, but the
+	 * function will return with data caches and MMU disabled. We must
+	 * ensure that the stack memory is flushed out to memory before we start
+	 * popping from it again.
+	 */
+	psci_do_pwrdown_cache_maintenance(power_level);
+#endif
+}
diff --git a/lib/psci/psci_lib.mk b/lib/psci/psci_lib.mk
new file mode 100644
index 00000000..1d4aac4a
--- /dev/null
+++ b/lib/psci/psci_lib.mk
@@ -0,0 +1,35 @@
+#
+# Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+PSCI_LIB_SOURCES	:=	lib/el3_runtime/cpu_data_array.c	\
+				lib/el3_runtime/${ARCH}/cpu_data.S	\
+				lib/el3_runtime/${ARCH}/context_mgmt.c	\
+				lib/cpus/${ARCH}/cpu_helpers.S		\
+				lib/cpus/errata_report.c		\
+				lib/locks/exclusive/${ARCH}/spinlock.S	\
+				lib/psci/psci_off.c			\
+				lib/psci/psci_on.c			\
+				lib/psci/psci_suspend.c			\
+				lib/psci/psci_common.c			\
+				lib/psci/psci_main.c			\
+				lib/psci/psci_setup.c			\
+				lib/psci/psci_system_off.c		\
+				lib/psci/psci_mem_protect.c		\
+				lib/psci/${ARCH}/psci_helpers.S
+
+ifeq (${ARCH}, aarch64)
+PSCI_LIB_SOURCES	+=	lib/el3_runtime/aarch64/context.S
+endif
+
+ifeq (${USE_COHERENT_MEM}, 1)
+PSCI_LIB_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
+else
+PSCI_LIB_SOURCES		+=	lib/locks/bakery/bakery_lock_normal.c
+endif
+
+ifeq (${ENABLE_PSCI_STAT}, 1)
+PSCI_LIB_SOURCES		+=	lib/psci/psci_stat.c
+endif
diff --git a/lib/psci/psci_main.c b/lib/psci/psci_main.c
new file mode 100644
index 00000000..4105e63b
--- /dev/null
+++ b/lib/psci/psci_main.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <debug.h>
+#include <platform.h>
+#include <pmf.h>
+#include <runtime_instr.h>
+#include <smcc.h>
+#include <string.h>
+#include "psci_private.h"
+
+/*******************************************************************************
+ * PSCI frontend api for servicing SMCs. Described in the PSCI spec.
+ ******************************************************************************/
+int psci_cpu_on(u_register_t target_cpu,
+		uintptr_t entrypoint,
+		u_register_t context_id)
+
+{
+	int rc;
+	entry_point_info_t ep;
+
+	/* Determine if the cpu exists of not */
+	rc = psci_validate_mpidr(target_cpu);
+	if (rc != PSCI_E_SUCCESS)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* Validate the entry point and get the entry_point_info */
+	rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+	if (rc != PSCI_E_SUCCESS)
+		return rc;
+
+	/*
+	 * To turn this cpu on, specify which power
+	 * levels need to be turned on
+	 */
+	return psci_cpu_on_start(target_cpu, &ep);
+}
+
+unsigned int psci_version(void)
+{
+	return PSCI_MAJOR_VER | PSCI_MINOR_VER;
+}
+
+int psci_cpu_suspend(unsigned int power_state,
+		     uintptr_t entrypoint,
+		     u_register_t context_id)
+{
+	int rc;
+	unsigned int target_pwrlvl, is_power_down_state;
+	entry_point_info_t ep;
+	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
+	plat_local_state_t cpu_pd_state;
+
+	/* Validate the power_state parameter */
+	rc = psci_validate_power_state(power_state, &state_info);
+	if (rc != PSCI_E_SUCCESS) {
+		assert(rc == PSCI_E_INVALID_PARAMS);
+		return rc;
+	}
+
+	/*
+	 * Get the value of the state type bit from the power state parameter.
+	 */
+	is_power_down_state = psci_get_pstate_type(power_state);
+
+	/* Sanity check the requested suspend levels */
+	assert(psci_validate_suspend_req(&state_info, is_power_down_state)
+			== PSCI_E_SUCCESS);
+
+	target_pwrlvl = psci_find_target_suspend_lvl(&state_info);
+	if (target_pwrlvl == PSCI_INVALID_PWR_LVL) {
+		ERROR("Invalid target power level for suspend operation\n");
+		panic();
+	}
+
+	/* Fast path for CPU standby.*/
+	if (is_cpu_standby_req(is_power_down_state, target_pwrlvl)) {
+		if  (!psci_plat_pm_ops->cpu_standby)
+			return PSCI_E_INVALID_PARAMS;
+
+		/*
+		 * Set the state of the CPU power domain to the platform
+		 * specific retention state and enter the standby state.
+		 */
+		cpu_pd_state = state_info.pwr_domain_state[PSCI_CPU_PWR_LVL];
+		psci_set_cpu_local_state(cpu_pd_state);
+
+#if ENABLE_PSCI_STAT
+		plat_psci_stat_accounting_start(&state_info);
+#endif
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+		PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		    RT_INSTR_ENTER_HW_LOW_PWR,
+		    PMF_NO_CACHE_MAINT);
+#endif
+
+		psci_plat_pm_ops->cpu_standby(cpu_pd_state);
+
+		/* Upon exit from standby, set the state back to RUN. */
+		psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN);
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+		PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		    RT_INSTR_EXIT_HW_LOW_PWR,
+		    PMF_NO_CACHE_MAINT);
+#endif
+
+#if ENABLE_PSCI_STAT
+		plat_psci_stat_accounting_stop(&state_info);
+
+		/* Update PSCI stats */
+		psci_stats_update_pwr_up(PSCI_CPU_PWR_LVL, &state_info);
+#endif
+
+		return PSCI_E_SUCCESS;
+	}
+
+	/*
+	 * If a power down state has been requested, we need to verify entry
+	 * point and program entry information.
+	 */
+	if (is_power_down_state) {
+		rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+		if (rc != PSCI_E_SUCCESS)
+			return rc;
+	}
+
+	/*
+	 * Do what is needed to enter the power down state. Upon success,
+	 * enter the final wfi which will power down this CPU. This function
+	 * might return if the power down was abandoned for any reason, e.g.
+	 * arrival of an interrupt
+	 */
+	psci_cpu_suspend_start(&ep,
+			    target_pwrlvl,
+			    &state_info,
+			    is_power_down_state);
+
+	return PSCI_E_SUCCESS;
+}
+
+
+int psci_system_suspend(uintptr_t entrypoint, u_register_t context_id)
+{
+	int rc;
+	psci_power_state_t state_info;
+	entry_point_info_t ep;
+
+	/* Check if the current CPU is the last ON CPU in the system */
+	if (!psci_is_last_on_cpu())
+		return PSCI_E_DENIED;
+
+	/* Validate the entry point and get the entry_point_info */
+	rc = psci_validate_entry_point(&ep, entrypoint, context_id);
+	if (rc != PSCI_E_SUCCESS)
+		return rc;
+
+	/* Query the psci_power_state for system suspend */
+	psci_query_sys_suspend_pwrstate(&state_info);
+
+	/* Ensure that the psci_power_state makes sense */
+	assert(psci_find_target_suspend_lvl(&state_info) == PLAT_MAX_PWR_LVL);
+	assert(psci_validate_suspend_req(&state_info, PSTATE_TYPE_POWERDOWN)
+						== PSCI_E_SUCCESS);
+	assert(is_local_state_off(state_info.pwr_domain_state[PLAT_MAX_PWR_LVL]));
+
+	/*
+	 * Do what is needed to enter the system suspend state. This function
+	 * might return if the power down was abandoned for any reason, e.g.
+	 * arrival of an interrupt
+	 */
+	psci_cpu_suspend_start(&ep,
+			    PLAT_MAX_PWR_LVL,
+			    &state_info,
+			    PSTATE_TYPE_POWERDOWN);
+
+	return PSCI_E_SUCCESS;
+}
+
+int psci_cpu_off(void)
+{
+	int rc;
+	unsigned int target_pwrlvl = PLAT_MAX_PWR_LVL;
+
+	/*
+	 * Do what is needed to power off this CPU and possible higher power
+	 * levels if it able to do so. Upon success, enter the final wfi
+	 * which will power down this CPU.
+	 */
+	rc = psci_do_cpu_off(target_pwrlvl);
+
+	/*
+	 * The only error cpu_off can return is E_DENIED. So check if that's
+	 * indeed the case.
+	 */
+	assert(rc == PSCI_E_DENIED);
+
+	return rc;
+}
+
+int psci_affinity_info(u_register_t target_affinity,
+		       unsigned int lowest_affinity_level)
+{
+	int target_idx;
+
+	/* We dont support level higher than PSCI_CPU_PWR_LVL */
+	if (lowest_affinity_level > PSCI_CPU_PWR_LVL)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* Calculate the cpu index of the target */
+	target_idx = plat_core_pos_by_mpidr(target_affinity);
+	if (target_idx == -1)
+		return PSCI_E_INVALID_PARAMS;
+
+	return psci_get_aff_info_state_by_idx(target_idx);
+}
+
+int psci_migrate(u_register_t target_cpu)
+{
+	int rc;
+	u_register_t resident_cpu_mpidr;
+
+	rc = psci_spd_migrate_info(&resident_cpu_mpidr);
+	if (rc != PSCI_TOS_UP_MIG_CAP)
+		return (rc == PSCI_TOS_NOT_UP_MIG_CAP) ?
+			  PSCI_E_DENIED : PSCI_E_NOT_SUPPORTED;
+
+	/*
+	 * Migrate should only be invoked on the CPU where
+	 * the Secure OS is resident.
+	 */
+	if (resident_cpu_mpidr != read_mpidr_el1())
+		return PSCI_E_NOT_PRESENT;
+
+	/* Check the validity of the specified target cpu */
+	rc = psci_validate_mpidr(target_cpu);
+	if (rc != PSCI_E_SUCCESS)
+		return PSCI_E_INVALID_PARAMS;
+
+	assert(psci_spd_pm && psci_spd_pm->svc_migrate);
+
+	rc = psci_spd_pm->svc_migrate(read_mpidr_el1(), target_cpu);
+	assert(rc == PSCI_E_SUCCESS || rc == PSCI_E_INTERN_FAIL);
+
+	return rc;
+}
+
+int psci_migrate_info_type(void)
+{
+	u_register_t resident_cpu_mpidr;
+
+	return psci_spd_migrate_info(&resident_cpu_mpidr);
+}
+
+long psci_migrate_info_up_cpu(void)
+{
+	u_register_t resident_cpu_mpidr;
+	int rc;
+
+	/*
+	 * Return value of this depends upon what
+	 * psci_spd_migrate_info() returns.
+	 */
+	rc = psci_spd_migrate_info(&resident_cpu_mpidr);
+	if (rc != PSCI_TOS_NOT_UP_MIG_CAP && rc != PSCI_TOS_UP_MIG_CAP)
+		return PSCI_E_INVALID_PARAMS;
+
+	return resident_cpu_mpidr;
+}
+
+int psci_node_hw_state(u_register_t target_cpu,
+		       unsigned int power_level)
+{
+	int rc;
+
+	/* Validate target_cpu */
+	rc = psci_validate_mpidr(target_cpu);
+	if (rc != PSCI_E_SUCCESS)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* Validate power_level against PLAT_MAX_PWR_LVL */
+	if (power_level > PLAT_MAX_PWR_LVL)
+		return PSCI_E_INVALID_PARAMS;
+
+	/*
+	 * Dispatch this call to platform to query power controller, and pass on
+	 * to the caller what it returns
+	 */
+	assert(psci_plat_pm_ops->get_node_hw_state);
+	rc = psci_plat_pm_ops->get_node_hw_state(target_cpu, power_level);
+	assert((rc >= HW_ON && rc <= HW_STANDBY) || rc == PSCI_E_NOT_SUPPORTED
+			|| rc == PSCI_E_INVALID_PARAMS);
+	return rc;
+}
+
+int psci_features(unsigned int psci_fid)
+{
+	unsigned int local_caps = psci_caps;
+
+	/* Check if it is a 64 bit function */
+	if (((psci_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_64)
+		local_caps &= PSCI_CAP_64BIT_MASK;
+
+	/* Check for invalid fid */
+	if (!(is_std_svc_call(psci_fid) && is_valid_fast_smc(psci_fid)
+			&& is_psci_fid(psci_fid)))
+		return PSCI_E_NOT_SUPPORTED;
+
+
+	/* Check if the psci fid is supported or not */
+	if (!(local_caps & define_psci_cap(psci_fid)))
+		return PSCI_E_NOT_SUPPORTED;
+
+	/* Format the feature flags */
+	if (psci_fid == PSCI_CPU_SUSPEND_AARCH32 ||
+			psci_fid == PSCI_CPU_SUSPEND_AARCH64) {
+		/*
+		 * The trusted firmware does not support OS Initiated Mode.
+		 */
+		return (FF_PSTATE << FF_PSTATE_SHIFT) |
+			((!FF_SUPPORTS_OS_INIT_MODE) << FF_MODE_SUPPORT_SHIFT);
+	}
+
+	/* Return 0 for all other fid's */
+	return PSCI_E_SUCCESS;
+}
+
+/*******************************************************************************
+ * PSCI top level handler for servicing SMCs.
+ ******************************************************************************/
+u_register_t psci_smc_handler(uint32_t smc_fid,
+			  u_register_t x1,
+			  u_register_t x2,
+			  u_register_t x3,
+			  u_register_t x4,
+			  void *cookie,
+			  void *handle,
+			  u_register_t flags)
+{
+	if (is_caller_secure(flags))
+		return SMC_UNK;
+
+	/* Check the fid against the capabilities */
+	if (!(psci_caps & define_psci_cap(smc_fid)))
+		return SMC_UNK;
+
+	if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) {
+		/* 32-bit PSCI function, clear top parameter bits */
+
+		x1 = (uint32_t)x1;
+		x2 = (uint32_t)x2;
+		x3 = (uint32_t)x3;
+
+		switch (smc_fid) {
+		case PSCI_VERSION:
+			return psci_version();
+
+		case PSCI_CPU_OFF:
+			return psci_cpu_off();
+
+		case PSCI_CPU_SUSPEND_AARCH32:
+			return psci_cpu_suspend(x1, x2, x3);
+
+		case PSCI_CPU_ON_AARCH32:
+			return psci_cpu_on(x1, x2, x3);
+
+		case PSCI_AFFINITY_INFO_AARCH32:
+			return psci_affinity_info(x1, x2);
+
+		case PSCI_MIG_AARCH32:
+			return psci_migrate(x1);
+
+		case PSCI_MIG_INFO_TYPE:
+			return psci_migrate_info_type();
+
+		case PSCI_MIG_INFO_UP_CPU_AARCH32:
+			return psci_migrate_info_up_cpu();
+
+		case PSCI_NODE_HW_STATE_AARCH32:
+			return psci_node_hw_state(x1, x2);
+
+		case PSCI_SYSTEM_SUSPEND_AARCH32:
+			return psci_system_suspend(x1, x2);
+
+		case PSCI_SYSTEM_OFF:
+			psci_system_off();
+			/* We should never return from psci_system_off() */
+
+		case PSCI_SYSTEM_RESET:
+			psci_system_reset();
+			/* We should never return from psci_system_reset() */
+
+		case PSCI_FEATURES:
+			return psci_features(x1);
+
+#if ENABLE_PSCI_STAT
+		case PSCI_STAT_RESIDENCY_AARCH32:
+			return psci_stat_residency(x1, x2);
+
+		case PSCI_STAT_COUNT_AARCH32:
+			return psci_stat_count(x1, x2);
+#endif
+		case PSCI_MEM_PROTECT:
+			return psci_mem_protect(x1);
+
+		case PSCI_MEM_CHK_RANGE_AARCH32:
+			return psci_mem_chk_range(x1, x2);
+
+		case PSCI_SYSTEM_RESET2_AARCH32:
+			/* We should never return from psci_system_reset2() */
+			return psci_system_reset2(x1, x2);
+
+		default:
+			break;
+		}
+	} else {
+		/* 64-bit PSCI function */
+
+		switch (smc_fid) {
+		case PSCI_CPU_SUSPEND_AARCH64:
+			return psci_cpu_suspend(x1, x2, x3);
+
+		case PSCI_CPU_ON_AARCH64:
+			return psci_cpu_on(x1, x2, x3);
+
+		case PSCI_AFFINITY_INFO_AARCH64:
+			return psci_affinity_info(x1, x2);
+
+		case PSCI_MIG_AARCH64:
+			return psci_migrate(x1);
+
+		case PSCI_MIG_INFO_UP_CPU_AARCH64:
+			return psci_migrate_info_up_cpu();
+
+		case PSCI_NODE_HW_STATE_AARCH64:
+			return psci_node_hw_state(x1, x2);
+
+		case PSCI_SYSTEM_SUSPEND_AARCH64:
+			return psci_system_suspend(x1, x2);
+
+#if ENABLE_PSCI_STAT
+		case PSCI_STAT_RESIDENCY_AARCH64:
+			return psci_stat_residency(x1, x2);
+
+		case PSCI_STAT_COUNT_AARCH64:
+			return psci_stat_count(x1, x2);
+#endif
+
+		case PSCI_MEM_CHK_RANGE_AARCH64:
+			return psci_mem_chk_range(x1, x2);
+
+		case PSCI_SYSTEM_RESET2_AARCH64:
+			/* We should never return from psci_system_reset2() */
+			return psci_system_reset2(x1, x2);
+
+		default:
+			break;
+		}
+	}
+
+	WARN("Unimplemented PSCI Call: 0x%x \n", smc_fid);
+	return SMC_UNK;
+}
diff --git a/lib/psci/psci_mem_protect.c b/lib/psci/psci_mem_protect.c
new file mode 100644
index 00000000..fca84e90
--- /dev/null
+++ b/lib/psci/psci_mem_protect.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <utils.h>
+#include "psci_private.h"
+
+int psci_mem_protect(unsigned int enable)
+{
+	int val;
+
+	assert(psci_plat_pm_ops->read_mem_protect);
+	assert(psci_plat_pm_ops->write_mem_protect);
+
+	if (psci_plat_pm_ops->read_mem_protect(&val) < 0)
+		return PSCI_E_NOT_SUPPORTED;
+	if (psci_plat_pm_ops->write_mem_protect(enable) < 0)
+		return PSCI_E_NOT_SUPPORTED;
+
+	return val != 0;
+}
+
+int psci_mem_chk_range(uintptr_t base, u_register_t length)
+{
+	int ret;
+
+	assert(psci_plat_pm_ops->mem_protect_chk);
+
+	if (length == 0 || check_uptr_overflow(base, length-1))
+		return PSCI_E_DENIED;
+
+	ret = psci_plat_pm_ops->mem_protect_chk(base, length);
+	return (ret < 0) ? PSCI_E_DENIED : PSCI_E_SUCCESS;
+}
diff --git a/lib/psci/psci_off.c b/lib/psci/psci_off.c
new file mode 100644
index 00000000..231deea2
--- /dev/null
+++ b/lib/psci/psci_off.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <debug.h>
+#include <platform.h>
+#include <pmf.h>
+#include <runtime_instr.h>
+#include <string.h>
+#include "psci_private.h"
+
+/******************************************************************************
+ * Construct the psci_power_state to request power OFF at all power levels.
+ ******************************************************************************/
+static void psci_set_power_off_state(psci_power_state_t *state_info)
+{
+	unsigned int lvl;
+
+	for (lvl = PSCI_CPU_PWR_LVL; lvl <= PLAT_MAX_PWR_LVL; lvl++)
+		state_info->pwr_domain_state[lvl] = PLAT_MAX_OFF_STATE;
+}
+
+/******************************************************************************
+ * Top level handler which is called when a cpu wants to power itself down.
+ * It's assumed that along with turning the cpu power domain off, power
+ * domains at higher levels will be turned off as far as possible. It finds
+ * the highest level where a domain has to be powered off by traversing the
+ * node information and then performs generic, architectural, platform setup
+ * and state management required to turn OFF that power domain and domains
+ * below it. e.g. For a cpu that's to be powered OFF, it could mean programming
+ * the power controller whereas for a cluster that's to be powered off, it will
+ * call the platform specific code which will disable coherency at the
+ * interconnect level if the cpu is the last in the cluster and also the
+ * program the power controller.
+ ******************************************************************************/
+int psci_do_cpu_off(unsigned int end_pwrlvl)
+{
+	int rc = PSCI_E_SUCCESS, idx = plat_my_core_pos();
+	psci_power_state_t state_info;
+
+	/*
+	 * This function must only be called on platforms where the
+	 * CPU_OFF platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->pwr_domain_off);
+
+	/* Construct the psci_power_state for CPU_OFF */
+	psci_set_power_off_state(&state_info);
+
+	/*
+	 * This function acquires the lock corresponding to each power
+	 * level so that by the time all locks are taken, the system topology
+	 * is snapshot and state management can be done safely.
+	 */
+	psci_acquire_pwr_domain_locks(end_pwrlvl,
+				      idx);
+
+	/*
+	 * Call the cpu off handler registered by the Secure Payload Dispatcher
+	 * to let it do any bookkeeping. Assume that the SPD always reports an
+	 * E_DENIED error if SP refuse to power down
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_off) {
+		rc = psci_spd_pm->svc_off(0);
+		if (rc)
+			goto exit;
+	}
+
+	/*
+	 * This function is passed the requested state info and
+	 * it returns the negotiated state info for each power level upto
+	 * the end level specified.
+	 */
+	psci_do_state_coordination(end_pwrlvl, &state_info);
+
+#if ENABLE_PSCI_STAT
+	/* Update the last cpu for each level till end_pwrlvl */
+	psci_stats_update_pwr_down(end_pwrlvl, &state_info);
+#endif
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+
+	/*
+	 * Flush cache line so that even if CPU power down happens
+	 * the timestamp update is reflected in memory.
+	 */
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		RT_INSTR_ENTER_CFLUSH,
+		PMF_CACHE_MAINT);
+#endif
+
+	/*
+	 * Arch. management. Initiate power down sequence.
+	 */
+	psci_do_pwrdown_sequence(psci_find_max_off_lvl(&state_info));
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		RT_INSTR_EXIT_CFLUSH,
+		PMF_NO_CACHE_MAINT);
+#endif
+
+	/*
+	 * Plat. management: Perform platform specific actions to turn this
+	 * cpu off e.g. exit cpu coherency, program the power controller etc.
+	 */
+	psci_plat_pm_ops->pwr_domain_off(&state_info);
+
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_start(&state_info);
+#endif
+
+exit:
+	/*
+	 * Release the locks corresponding to each power level in the
+	 * reverse order to which they were acquired.
+	 */
+	psci_release_pwr_domain_locks(end_pwrlvl,
+				      idx);
+
+	/*
+	 * Check if all actions needed to safely power down this cpu have
+	 * successfully completed.
+	 */
+	if (rc == PSCI_E_SUCCESS) {
+		/*
+		 * Set the affinity info state to OFF. When caches are disabled,
+		 * this writes directly to main memory, so cache maintenance is
+		 * required to ensure that later cached reads of aff_info_state
+		 * return AFF_STATE_OFF. A dsbish() ensures ordering of the
+		 * update to the affinity info state prior to cache line
+		 * invalidation.
+		 */
+		psci_flush_cpu_data(psci_svc_cpu_data.aff_info_state);
+		psci_set_aff_info_state(AFF_STATE_OFF);
+		psci_dsbish();
+		psci_inv_cpu_data(psci_svc_cpu_data.aff_info_state);
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+
+		/*
+		 * Update the timestamp with cache off.  We assume this
+		 * timestamp can only be read from the current CPU and the
+		 * timestamp cache line will be flushed before return to
+		 * normal world on wakeup.
+		 */
+		PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		    RT_INSTR_ENTER_HW_LOW_PWR,
+		    PMF_NO_CACHE_MAINT);
+#endif
+
+		if (psci_plat_pm_ops->pwr_domain_pwr_down_wfi) {
+			/* This function must not return */
+			psci_plat_pm_ops->pwr_domain_pwr_down_wfi(&state_info);
+		} else {
+			/*
+			 * Enter a wfi loop which will allow the power
+			 * controller to physically power down this cpu.
+			 */
+			psci_power_down_wfi();
+		}
+	}
+
+	return rc;
+}
diff --git a/lib/psci/psci_on.c b/lib/psci/psci_on.c
new file mode 100644
index 00000000..53b044ec
--- /dev/null
+++ b/lib/psci/psci_on.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context_mgmt.h>
+#include <debug.h>
+#include <platform.h>
+#include <pubsub_events.h>
+#include <stddef.h>
+#include "psci_private.h"
+
+/*******************************************************************************
+ * This function checks whether a cpu which has been requested to be turned on
+ * is OFF to begin with.
+ ******************************************************************************/
+static int cpu_on_validate_state(aff_info_state_t aff_state)
+{
+	if (aff_state == AFF_STATE_ON)
+		return PSCI_E_ALREADY_ON;
+
+	if (aff_state == AFF_STATE_ON_PENDING)
+		return PSCI_E_ON_PENDING;
+
+	assert(aff_state == AFF_STATE_OFF);
+	return PSCI_E_SUCCESS;
+}
+
+/*******************************************************************************
+ * Generic handler which is called to physically power on a cpu identified by
+ * its mpidr. It performs the generic, architectural, platform setup and state
+ * management to power on the target cpu e.g. it will ensure that
+ * enough information is stashed for it to resume execution in the non-secure
+ * security state.
+ *
+ * The state of all the relevant power domains are changed after calling the
+ * platform handler as it can return error.
+ ******************************************************************************/
+int psci_cpu_on_start(u_register_t target_cpu,
+		      entry_point_info_t *ep)
+{
+	int rc;
+	unsigned int target_idx = plat_core_pos_by_mpidr(target_cpu);
+	aff_info_state_t target_aff_state;
+
+	/* Calling function must supply valid input arguments */
+	assert((int) target_idx >= 0);
+	assert(ep != NULL);
+
+	/*
+	 * This function must only be called on platforms where the
+	 * CPU_ON platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->pwr_domain_on &&
+			psci_plat_pm_ops->pwr_domain_on_finish);
+
+	/* Protect against multiple CPUs trying to turn ON the same target CPU */
+	psci_spin_lock_cpu(target_idx);
+
+	/*
+	 * Generic management: Ensure that the cpu is off to be
+	 * turned on.
+	 * Perform cache maintanence ahead of reading the target CPU state to
+	 * ensure that the data is not stale.
+	 * There is a theoretical edge case where the cache may contain stale
+	 * data for the target CPU data - this can occur under the following
+	 * conditions:
+	 * - the target CPU is in another cluster from the current
+	 * - the target CPU was the last CPU to shutdown on its cluster
+	 * - the cluster was removed from coherency as part of the CPU shutdown
+	 *
+	 * In this case the cache maintenace that was performed as part of the
+	 * target CPUs shutdown was not seen by the current CPU's cluster. And
+	 * so the cache may contain stale data for the target CPU.
+	 */
+	flush_cpu_data_by_index(target_idx, psci_svc_cpu_data.aff_info_state);
+	rc = cpu_on_validate_state(psci_get_aff_info_state_by_idx(target_idx));
+	if (rc != PSCI_E_SUCCESS)
+		goto exit;
+
+	/*
+	 * Call the cpu on handler registered by the Secure Payload Dispatcher
+	 * to let it do any bookeeping. If the handler encounters an error, it's
+	 * expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_on)
+		psci_spd_pm->svc_on(target_cpu);
+
+	/*
+	 * Set the Affinity info state of the target cpu to ON_PENDING.
+	 * Flush aff_info_state as it will be accessed with caches
+	 * turned OFF.
+	 */
+	psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);
+	flush_cpu_data_by_index(target_idx, psci_svc_cpu_data.aff_info_state);
+
+	/*
+	 * The cache line invalidation by the target CPU after setting the
+	 * state to OFF (see psci_do_cpu_off()), could cause the update to
+	 * aff_info_state to be invalidated. Retry the update if the target
+	 * CPU aff_info_state is not ON_PENDING.
+	 */
+	target_aff_state = psci_get_aff_info_state_by_idx(target_idx);
+	if (target_aff_state != AFF_STATE_ON_PENDING) {
+		assert(target_aff_state == AFF_STATE_OFF);
+		psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);
+		flush_cpu_data_by_index(target_idx, psci_svc_cpu_data.aff_info_state);
+
+		assert(psci_get_aff_info_state_by_idx(target_idx) == AFF_STATE_ON_PENDING);
+	}
+
+	/*
+	 * Perform generic, architecture and platform specific handling.
+	 */
+	/*
+	 * Plat. management: Give the platform the current state
+	 * of the target cpu to allow it to perform the necessary
+	 * steps to power on.
+	 */
+	rc = psci_plat_pm_ops->pwr_domain_on(target_cpu);
+	assert(rc == PSCI_E_SUCCESS || rc == PSCI_E_INTERN_FAIL);
+
+	if (rc == PSCI_E_SUCCESS)
+		/* Store the re-entry information for the non-secure world. */
+		cm_init_context_by_index(target_idx, ep);
+	else {
+		/* Restore the state on error. */
+		psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_OFF);
+		flush_cpu_data_by_index(target_idx, psci_svc_cpu_data.aff_info_state);
+	}
+
+exit:
+	psci_spin_unlock_cpu(target_idx);
+	return rc;
+}
+
+/*******************************************************************************
+ * The following function finish an earlier power on request. They
+ * are called by the common finisher routine in psci_common.c. The `state_info`
+ * is the psci_power_state from which this CPU has woken up from.
+ ******************************************************************************/
+void psci_cpu_on_finish(unsigned int cpu_idx,
+			psci_power_state_t *state_info)
+{
+	/*
+	 * Plat. management: Perform the platform specific actions
+	 * for this cpu e.g. enabling the gic or zeroing the mailbox
+	 * register. The actual state of this cpu has already been
+	 * changed.
+	 */
+	psci_plat_pm_ops->pwr_domain_on_finish(state_info);
+
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+	/*
+	 * Arch. management: Enable data cache and manage stack memory
+	 */
+	psci_do_pwrup_cache_maintenance();
+#endif
+
+	/*
+	 * All the platform specific actions for turning this cpu
+	 * on have completed. Perform enough arch.initialization
+	 * to run in the non-secure address space.
+	 */
+	psci_arch_setup();
+
+	/*
+	 * Lock the CPU spin lock to make sure that the context initialization
+	 * is done. Since the lock is only used in this function to create
+	 * a synchronization point with cpu_on_start(), it can be released
+	 * immediately.
+	 */
+	psci_spin_lock_cpu(cpu_idx);
+	psci_spin_unlock_cpu(cpu_idx);
+
+	/* Ensure we have been explicitly woken up by another cpu */
+	assert(psci_get_aff_info_state() == AFF_STATE_ON_PENDING);
+
+	/*
+	 * Call the cpu on finish handler registered by the Secure Payload
+	 * Dispatcher to let it do any bookeeping. If the handler encounters an
+	 * error, it's expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_on_finish)
+		psci_spd_pm->svc_on_finish(0);
+
+	PUBLISH_EVENT(psci_cpu_on_finish);
+
+	/* Populate the mpidr field within the cpu node array */
+	/* This needs to be done only once */
+	psci_cpu_pd_nodes[cpu_idx].mpidr = read_mpidr() & MPIDR_AFFINITY_MASK;
+
+	/*
+	 * Generic management: Now we just need to retrieve the
+	 * information that we had stashed away during the cpu_on
+	 * call to set this cpu on its way.
+	 */
+	cm_prepare_el3_exit(NON_SECURE);
+}
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
new file mode 100644
index 00000000..504fb9e4
--- /dev/null
+++ b/lib/psci/psci_private.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __PSCI_PRIVATE_H__
+#define __PSCI_PRIVATE_H__
+
+#include <arch.h>
+#include <bakery_lock.h>
+#include <bl_common.h>
+#include <cpu_data.h>
+#include <psci.h>
+#include <spinlock.h>
+
+#if HW_ASSISTED_COHERENCY
+
+/*
+ * On systems with hardware-assisted coherency, make PSCI cache operations NOP,
+ * as PSCI participants are cache-coherent, and there's no need for explicit
+ * cache maintenance operations or barriers to coordinate their state.
+ */
+#define psci_flush_dcache_range(addr, size)
+#define psci_flush_cpu_data(member)
+#define psci_inv_cpu_data(member)
+
+#define psci_dsbish()
+
+/*
+ * On systems where participant CPUs are cache-coherent, we can use spinlocks
+ * instead of bakery locks.
+ */
+#define DEFINE_PSCI_LOCK(_name)		spinlock_t _name
+#define DECLARE_PSCI_LOCK(_name)	extern DEFINE_PSCI_LOCK(_name)
+
+#define psci_lock_get(non_cpu_pd_node)				\
+	spin_lock(&psci_locks[(non_cpu_pd_node)->lock_index])
+#define psci_lock_release(non_cpu_pd_node)			\
+	spin_unlock(&psci_locks[(non_cpu_pd_node)->lock_index])
+
+#else
+
+/*
+ * If not all PSCI participants are cache-coherent, perform cache maintenance
+ * and issue barriers wherever required to coordinate state.
+ */
+#define psci_flush_dcache_range(addr, size)	flush_dcache_range(addr, size)
+#define psci_flush_cpu_data(member)		flush_cpu_data(member)
+#define psci_inv_cpu_data(member)		inv_cpu_data(member)
+
+#define psci_dsbish()				dsbish()
+
+/*
+ * Use bakery locks for state coordination as not all PSCI participants are
+ * cache coherent.
+ */
+#define DEFINE_PSCI_LOCK(_name)		DEFINE_BAKERY_LOCK(_name)
+#define DECLARE_PSCI_LOCK(_name)	DECLARE_BAKERY_LOCK(_name)
+
+#define psci_lock_get(non_cpu_pd_node)				\
+	bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index])
+#define psci_lock_release(non_cpu_pd_node)			\
+	bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index])
+
+#endif
+
+#define psci_lock_init(non_cpu_pd_node, idx)			\
+	((non_cpu_pd_node)[(idx)].lock_index = (idx))
+
+/*
+ * The PSCI capability which are provided by the generic code but does not
+ * depend on the platform or spd capabilities.
+ */
+#define PSCI_GENERIC_CAP	\
+			(define_psci_cap(PSCI_VERSION) |		\
+			define_psci_cap(PSCI_AFFINITY_INFO_AARCH64) |	\
+			define_psci_cap(PSCI_FEATURES))
+
+/*
+ * The PSCI capabilities mask for 64 bit functions.
+ */
+#define PSCI_CAP_64BIT_MASK	\
+			(define_psci_cap(PSCI_CPU_SUSPEND_AARCH64) |	\
+			define_psci_cap(PSCI_CPU_ON_AARCH64) |		\
+			define_psci_cap(PSCI_AFFINITY_INFO_AARCH64) |	\
+			define_psci_cap(PSCI_MIG_AARCH64) |		\
+			define_psci_cap(PSCI_MIG_INFO_UP_CPU_AARCH64) |	\
+			define_psci_cap(PSCI_NODE_HW_STATE_AARCH64) |	\
+			define_psci_cap(PSCI_SYSTEM_SUSPEND_AARCH64) |	\
+			define_psci_cap(PSCI_STAT_RESIDENCY_AARCH64) |	\
+			define_psci_cap(PSCI_STAT_COUNT_AARCH64) |	\
+			define_psci_cap(PSCI_SYSTEM_RESET2_AARCH64) |	\
+			define_psci_cap(PSCI_MEM_CHK_RANGE_AARCH64))
+
+/*
+ * Helper macros to get/set the fields of PSCI per-cpu data.
+ */
+#define psci_set_aff_info_state(aff_state) \
+		set_cpu_data(psci_svc_cpu_data.aff_info_state, aff_state)
+#define psci_get_aff_info_state() \
+		get_cpu_data(psci_svc_cpu_data.aff_info_state)
+#define psci_get_aff_info_state_by_idx(idx) \
+		get_cpu_data_by_index(idx, psci_svc_cpu_data.aff_info_state)
+#define psci_set_aff_info_state_by_idx(idx, aff_state) \
+		set_cpu_data_by_index(idx, psci_svc_cpu_data.aff_info_state,\
+					aff_state)
+#define psci_get_suspend_pwrlvl() \
+		get_cpu_data(psci_svc_cpu_data.target_pwrlvl)
+#define psci_set_suspend_pwrlvl(target_lvl) \
+		set_cpu_data(psci_svc_cpu_data.target_pwrlvl, target_lvl)
+#define psci_set_cpu_local_state(state) \
+		set_cpu_data(psci_svc_cpu_data.local_state, state)
+#define psci_get_cpu_local_state() \
+		get_cpu_data(psci_svc_cpu_data.local_state)
+#define psci_get_cpu_local_state_by_idx(idx) \
+		get_cpu_data_by_index(idx, psci_svc_cpu_data.local_state)
+
+/*
+ * Helper macros for the CPU level spinlocks
+ */
+#define psci_spin_lock_cpu(idx)	spin_lock(&psci_cpu_pd_nodes[idx].cpu_lock)
+#define psci_spin_unlock_cpu(idx) spin_unlock(&psci_cpu_pd_nodes[idx].cpu_lock)
+
+/* Helper macro to identify a CPU standby request in PSCI Suspend call */
+#define is_cpu_standby_req(is_power_down_state, retn_lvl) \
+		(((!(is_power_down_state)) && ((retn_lvl) == 0)) ? 1 : 0)
+
+/*******************************************************************************
+ * The following two data structures implement the power domain tree. The tree
+ * is used to track the state of all the nodes i.e. power domain instances
+ * described by the platform. The tree consists of nodes that describe CPU power
+ * domains i.e. leaf nodes and all other power domains which are parents of a
+ * CPU power domain i.e. non-leaf nodes.
+ ******************************************************************************/
+typedef struct non_cpu_pwr_domain_node {
+	/*
+	 * Index of the first CPU power domain node level 0 which has this node
+	 * as its parent.
+	 */
+	unsigned int cpu_start_idx;
+
+	/*
+	 * Number of CPU power domains which are siblings of the domain indexed
+	 * by 'cpu_start_idx' i.e. all the domains in the range 'cpu_start_idx
+	 * -> cpu_start_idx + ncpus' have this node as their parent.
+	 */
+	unsigned int ncpus;
+
+	/*
+	 * Index of the parent power domain node.
+	 * TODO: Figure out whether to whether using pointer is more efficient.
+	 */
+	unsigned int parent_node;
+
+	plat_local_state_t local_state;
+
+	unsigned char level;
+
+	/* For indexing the psci_lock array*/
+	unsigned char lock_index;
+} non_cpu_pd_node_t;
+
+typedef struct cpu_pwr_domain_node {
+	u_register_t mpidr;
+
+	/*
+	 * Index of the parent power domain node.
+	 * TODO: Figure out whether to whether using pointer is more efficient.
+	 */
+	unsigned int parent_node;
+
+	/*
+	 * A CPU power domain does not require state coordination like its
+	 * parent power domains. Hence this node does not include a bakery
+	 * lock. A spinlock is required by the CPU_ON handler to prevent a race
+	 * when multiple CPUs try to turn ON the same target CPU.
+	 */
+	spinlock_t cpu_lock;
+} cpu_pd_node_t;
+
+/*******************************************************************************
+ * Data prototypes
+ ******************************************************************************/
+extern const plat_psci_ops_t *psci_plat_pm_ops;
+extern non_cpu_pd_node_t psci_non_cpu_pd_nodes[PSCI_NUM_NON_CPU_PWR_DOMAINS];
+extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
+extern unsigned int psci_caps;
+
+/* One lock is required per non-CPU power domain node */
+DECLARE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
+/*******************************************************************************
+ * SPD's power management hooks registered with PSCI
+ ******************************************************************************/
+extern const spd_pm_ops_t *psci_spd_pm;
+
+/*******************************************************************************
+ * Function prototypes
+ ******************************************************************************/
+/* Private exported functions from psci_common.c */
+int psci_validate_power_state(unsigned int power_state,
+			      psci_power_state_t *state_info);
+void psci_query_sys_suspend_pwrstate(psci_power_state_t *state_info);
+int psci_validate_mpidr(u_register_t mpidr);
+void psci_init_req_local_pwr_states(void);
+void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
+				      psci_power_state_t *target_state);
+int psci_validate_entry_point(entry_point_info_t *ep,
+			uintptr_t entrypoint, u_register_t context_id);
+void psci_get_parent_pwr_domain_nodes(unsigned int cpu_idx,
+				      unsigned int end_lvl,
+				      unsigned int node_index[]);
+void psci_do_state_coordination(unsigned int end_pwrlvl,
+				psci_power_state_t *state_info);
+void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl,
+				   unsigned int cpu_idx);
+void psci_release_pwr_domain_locks(unsigned int end_pwrlvl,
+				   unsigned int cpu_idx);
+int psci_validate_suspend_req(const psci_power_state_t *state_info,
+			      unsigned int is_power_down_state_req);
+unsigned int psci_find_max_off_lvl(const psci_power_state_t *state_info);
+unsigned int psci_find_target_suspend_lvl(const psci_power_state_t *state_info);
+void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl);
+void psci_print_power_domain_map(void);
+unsigned int psci_is_last_on_cpu(void);
+int psci_spd_migrate_info(u_register_t *mpidr);
+void psci_do_pwrdown_sequence(unsigned int power_level);
+
+/*
+ * CPU power down is directly called only when HW_ASSISTED_COHERENCY is
+ * available. Otherwise, this needs post-call stack maintenance, which is
+ * handled in assembly.
+ */
+void prepare_cpu_pwr_dwn(unsigned int power_level);
+
+/* Private exported functions from psci_on.c */
+int psci_cpu_on_start(u_register_t target_cpu,
+		      entry_point_info_t *ep);
+
+void psci_cpu_on_finish(unsigned int cpu_idx,
+			psci_power_state_t *state_info);
+
+/* Private exported functions from psci_off.c */
+int psci_do_cpu_off(unsigned int end_pwrlvl);
+
+/* Private exported functions from psci_suspend.c */
+void psci_cpu_suspend_start(entry_point_info_t *ep,
+			unsigned int end_pwrlvl,
+			psci_power_state_t *state_info,
+			unsigned int is_power_down_state_req);
+
+void psci_cpu_suspend_finish(unsigned int cpu_idx,
+			psci_power_state_t *state_info);
+
+/* Private exported functions from psci_helpers.S */
+void psci_do_pwrdown_cache_maintenance(unsigned int pwr_level);
+void psci_do_pwrup_cache_maintenance(void);
+
+/* Private exported functions from psci_system_off.c */
+void __dead2 psci_system_off(void);
+void __dead2 psci_system_reset(void);
+int psci_system_reset2(uint32_t reset_type, u_register_t cookie);
+
+/* Private exported functions from psci_stat.c */
+void psci_stats_update_pwr_down(unsigned int end_pwrlvl,
+			const psci_power_state_t *state_info);
+void psci_stats_update_pwr_up(unsigned int end_pwrlvl,
+			const psci_power_state_t *state_info);
+u_register_t psci_stat_residency(u_register_t target_cpu,
+			unsigned int power_state);
+u_register_t psci_stat_count(u_register_t target_cpu,
+			unsigned int power_state);
+
+/* Private exported functions from psci_mem_protect.c */
+int psci_mem_protect(unsigned int enable);
+int psci_mem_chk_range(uintptr_t base, u_register_t length);
+
+#endif /* __PSCI_PRIVATE_H__ */
diff --git a/lib/psci/psci_setup.c b/lib/psci/psci_setup.c
new file mode 100644
index 00000000..a841ddab
--- /dev/null
+++ b/lib/psci/psci_setup.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context.h>
+#include <context_mgmt.h>
+#include <errata_report.h>
+#include <platform.h>
+#include <stddef.h>
+#include "psci_private.h"
+
+/*******************************************************************************
+ * Per cpu non-secure contexts used to program the architectural state prior
+ * return to the normal world.
+ * TODO: Use the memory allocator to set aside memory for the contexts instead
+ * of relying on platform defined constants.
+ ******************************************************************************/
+static cpu_context_t psci_ns_context[PLATFORM_CORE_COUNT];
+
+/******************************************************************************
+ * Define the psci capability variable.
+ *****************************************************************************/
+unsigned int psci_caps;
+
+/*******************************************************************************
+ * Function which initializes the 'psci_non_cpu_pd_nodes' or the
+ * 'psci_cpu_pd_nodes' corresponding to the power level.
+ ******************************************************************************/
+static void psci_init_pwr_domain_node(unsigned int node_idx,
+					unsigned int parent_idx,
+					unsigned int level)
+{
+	if (level > PSCI_CPU_PWR_LVL) {
+		psci_non_cpu_pd_nodes[node_idx].level = level;
+		psci_lock_init(psci_non_cpu_pd_nodes, node_idx);
+		psci_non_cpu_pd_nodes[node_idx].parent_node = parent_idx;
+		psci_non_cpu_pd_nodes[node_idx].local_state =
+							 PLAT_MAX_OFF_STATE;
+	} else {
+		psci_cpu_data_t *svc_cpu_data;
+
+		psci_cpu_pd_nodes[node_idx].parent_node = parent_idx;
+
+		/* Initialize with an invalid mpidr */
+		psci_cpu_pd_nodes[node_idx].mpidr = PSCI_INVALID_MPIDR;
+
+		svc_cpu_data =
+			&(_cpu_data_by_index(node_idx)->psci_svc_cpu_data);
+
+		/* Set the Affinity Info for the cores as OFF */
+		svc_cpu_data->aff_info_state = AFF_STATE_OFF;
+
+		/* Invalidate the suspend level for the cpu */
+		svc_cpu_data->target_pwrlvl = PSCI_INVALID_PWR_LVL;
+
+		/* Set the power state to OFF state */
+		svc_cpu_data->local_state = PLAT_MAX_OFF_STATE;
+
+		psci_flush_dcache_range((uintptr_t)svc_cpu_data,
+						 sizeof(*svc_cpu_data));
+
+		cm_set_context_by_index(node_idx,
+					(void *) &psci_ns_context[node_idx],
+					NON_SECURE);
+	}
+}
+
+/*******************************************************************************
+ * This functions updates cpu_start_idx and ncpus field for each of the node in
+ * psci_non_cpu_pd_nodes[]. It does so by comparing the parent nodes of each of
+ * the CPUs and check whether they match with the parent of the previous
+ * CPU. The basic assumption for this work is that children of the same parent
+ * are allocated adjacent indices. The platform should ensure this though proper
+ * mapping of the CPUs to indices via plat_core_pos_by_mpidr() and
+ * plat_my_core_pos() APIs.
+ *******************************************************************************/
+static void psci_update_pwrlvl_limits(void)
+{
+	int j;
+	unsigned int nodes_idx[PLAT_MAX_PWR_LVL] = {0};
+	unsigned int temp_index[PLAT_MAX_PWR_LVL], cpu_idx;
+
+	for (cpu_idx = 0; cpu_idx < PLATFORM_CORE_COUNT; cpu_idx++) {
+		psci_get_parent_pwr_domain_nodes(cpu_idx,
+						 PLAT_MAX_PWR_LVL,
+						 temp_index);
+		for (j = PLAT_MAX_PWR_LVL - 1; j >= 0; j--) {
+			if (temp_index[j] != nodes_idx[j]) {
+				nodes_idx[j] = temp_index[j];
+				psci_non_cpu_pd_nodes[nodes_idx[j]].cpu_start_idx
+					= cpu_idx;
+			}
+			psci_non_cpu_pd_nodes[nodes_idx[j]].ncpus++;
+		}
+	}
+}
+
+/*******************************************************************************
+ * Core routine to populate the power domain tree. The tree descriptor passed by
+ * the platform is populated breadth-first and the first entry in the map
+ * informs the number of root power domains. The parent nodes of the root nodes
+ * will point to an invalid entry(-1).
+ ******************************************************************************/
+static void populate_power_domain_tree(const unsigned char *topology)
+{
+	unsigned int i, j = 0, num_nodes_at_lvl = 1, num_nodes_at_next_lvl;
+	unsigned int node_index = 0, parent_node_index = 0, num_children;
+	int level = PLAT_MAX_PWR_LVL;
+
+	/*
+	 * For each level the inputs are:
+	 * - number of nodes at this level in plat_array i.e. num_nodes_at_level
+	 *   This is the sum of values of nodes at the parent level.
+	 * - Index of first entry at this level in the plat_array i.e.
+	 *   parent_node_index.
+	 * - Index of first free entry in psci_non_cpu_pd_nodes[] or
+	 *   psci_cpu_pd_nodes[] i.e. node_index depending upon the level.
+	 */
+	while (level >= PSCI_CPU_PWR_LVL) {
+		num_nodes_at_next_lvl = 0;
+		/*
+		 * For each entry (parent node) at this level in the plat_array:
+		 * - Find the number of children
+		 * - Allocate a node in a power domain array for each child
+		 * - Set the parent of the child to the parent_node_index - 1
+		 * - Increment parent_node_index to point to the next parent
+		 * - Accumulate the number of children at next level.
+		 */
+		for (i = 0; i < num_nodes_at_lvl; i++) {
+			assert(parent_node_index <=
+					PSCI_NUM_NON_CPU_PWR_DOMAINS);
+			num_children = topology[parent_node_index];
+
+			for (j = node_index;
+				j < node_index + num_children; j++)
+				psci_init_pwr_domain_node(j,
+							  parent_node_index - 1,
+							  level);
+
+			node_index = j;
+			num_nodes_at_next_lvl += num_children;
+			parent_node_index++;
+		}
+
+		num_nodes_at_lvl = num_nodes_at_next_lvl;
+		level--;
+
+		/* Reset the index for the cpu power domain array */
+		if (level == PSCI_CPU_PWR_LVL)
+			node_index = 0;
+	}
+
+	/* Validate the sanity of array exported by the platform */
+	assert(j == PLATFORM_CORE_COUNT);
+}
+
+/*******************************************************************************
+ * This function does the architectural setup and takes the warm boot
+ * entry-point `mailbox_ep` as an argument. The function also initializes the
+ * power domain topology tree by querying the platform. The power domain nodes
+ * higher than the CPU are populated in the array psci_non_cpu_pd_nodes[] and
+ * the CPU power domains are populated in psci_cpu_pd_nodes[]. The platform
+ * exports its static topology map through the
+ * populate_power_domain_topology_tree() API. The algorithm populates the
+ * psci_non_cpu_pd_nodes and psci_cpu_pd_nodes iteratively by using this
+ * topology map.  On a platform that implements two clusters of 2 cpus each,
+ * and supporting 3 domain levels, the populated psci_non_cpu_pd_nodes would
+ * look like this:
+ *
+ * ---------------------------------------------------
+ * | system node | cluster 0 node  | cluster 1 node  |
+ * ---------------------------------------------------
+ *
+ * And populated psci_cpu_pd_nodes would look like this :
+ * <-    cpus cluster0   -><-   cpus cluster1   ->
+ * ------------------------------------------------
+ * |   CPU 0   |   CPU 1   |   CPU 2   |   CPU 3  |
+ * ------------------------------------------------
+ ******************************************************************************/
+int psci_setup(const psci_lib_args_t *lib_args)
+{
+	const unsigned char *topology_tree;
+
+	assert(VERIFY_PSCI_LIB_ARGS_V1(lib_args));
+
+	/* Do the Architectural initialization */
+	psci_arch_setup();
+
+	/* Query the topology map from the platform */
+	topology_tree = plat_get_power_domain_tree_desc();
+
+	/* Populate the power domain arrays using the platform topology map */
+	populate_power_domain_tree(topology_tree);
+
+	/* Update the CPU limits for each node in psci_non_cpu_pd_nodes */
+	psci_update_pwrlvl_limits();
+
+	/* Populate the mpidr field of cpu node for this CPU */
+	psci_cpu_pd_nodes[plat_my_core_pos()].mpidr =
+		read_mpidr() & MPIDR_AFFINITY_MASK;
+
+	psci_init_req_local_pwr_states();
+
+	/*
+	 * Set the requested and target state of this CPU and all the higher
+	 * power domain levels for this CPU to run.
+	 */
+	psci_set_pwr_domains_to_run(PLAT_MAX_PWR_LVL);
+
+	plat_setup_psci_ops((uintptr_t)lib_args->mailbox_ep, &psci_plat_pm_ops);
+	assert(psci_plat_pm_ops);
+
+	/*
+	 * Flush `psci_plat_pm_ops` as it will be accessed by secondary CPUs
+	 * during warm boot, possibly before data cache is enabled.
+	 */
+	psci_flush_dcache_range((uintptr_t)&psci_plat_pm_ops,
+					sizeof(psci_plat_pm_ops));
+
+	/* Initialize the psci capability */
+	psci_caps = PSCI_GENERIC_CAP;
+
+	if (psci_plat_pm_ops->pwr_domain_off)
+		psci_caps |=  define_psci_cap(PSCI_CPU_OFF);
+	if (psci_plat_pm_ops->pwr_domain_on &&
+			psci_plat_pm_ops->pwr_domain_on_finish)
+		psci_caps |=  define_psci_cap(PSCI_CPU_ON_AARCH64);
+	if (psci_plat_pm_ops->pwr_domain_suspend &&
+			psci_plat_pm_ops->pwr_domain_suspend_finish) {
+		psci_caps |=  define_psci_cap(PSCI_CPU_SUSPEND_AARCH64);
+		if (psci_plat_pm_ops->get_sys_suspend_power_state)
+			psci_caps |=  define_psci_cap(PSCI_SYSTEM_SUSPEND_AARCH64);
+	}
+	if (psci_plat_pm_ops->system_off)
+		psci_caps |=  define_psci_cap(PSCI_SYSTEM_OFF);
+	if (psci_plat_pm_ops->system_reset)
+		psci_caps |=  define_psci_cap(PSCI_SYSTEM_RESET);
+	if (psci_plat_pm_ops->get_node_hw_state)
+		psci_caps |= define_psci_cap(PSCI_NODE_HW_STATE_AARCH64);
+	if (psci_plat_pm_ops->read_mem_protect &&
+			psci_plat_pm_ops->write_mem_protect)
+		psci_caps |= define_psci_cap(PSCI_MEM_PROTECT);
+	if (psci_plat_pm_ops->mem_protect_chk)
+		psci_caps |= define_psci_cap(PSCI_MEM_CHK_RANGE_AARCH64);
+	if (psci_plat_pm_ops->system_reset2)
+		psci_caps |= define_psci_cap(PSCI_SYSTEM_RESET2_AARCH64);
+
+#if ENABLE_PSCI_STAT
+	psci_caps |=  define_psci_cap(PSCI_STAT_RESIDENCY_AARCH64);
+	psci_caps |=  define_psci_cap(PSCI_STAT_COUNT_AARCH64);
+#endif
+
+	return 0;
+}
+
+/*******************************************************************************
+ * This duplicates what the primary cpu did after a cold boot in BL1. The same
+ * needs to be done when a cpu is hotplugged in. This function could also over-
+ * ride any EL3 setup done by BL1 as this code resides in rw memory.
+ ******************************************************************************/
+void psci_arch_setup(void)
+{
+	/* Program the counter frequency */
+	write_cntfrq_el0(plat_get_syscnt_freq2());
+
+	/* Initialize the cpu_ops pointer. */
+	init_cpu_ops();
+
+	/* Having initialized cpu_ops, we can now print errata status */
+	print_errata_status();
+}
+
+/******************************************************************************
+ * PSCI Library interface to initialize the cpu context for the next non
+ * secure image during cold boot. The relevant registers in the cpu context
+ * need to be retrieved and programmed on return from this interface.
+ *****************************************************************************/
+void psci_prepare_next_non_secure_ctx(entry_point_info_t *next_image_info)
+{
+	assert(GET_SECURITY_STATE(next_image_info->h.attr) == NON_SECURE);
+	cm_init_my_context(next_image_info);
+	cm_prepare_el3_exit(NON_SECURE);
+}
diff --git a/lib/psci/psci_stat.c b/lib/psci/psci_stat.c
new file mode 100644
index 00000000..3e79c5d7
--- /dev/null
+++ b/lib/psci/psci_stat.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <debug.h>
+#include <platform.h>
+#include <platform_def.h>
+#include "psci_private.h"
+
+#ifndef PLAT_MAX_PWR_LVL_STATES
+#define PLAT_MAX_PWR_LVL_STATES 2
+#endif
+
+/* Following structure is used for PSCI STAT */
+typedef struct psci_stat {
+	u_register_t residency;
+	u_register_t count;
+} psci_stat_t;
+
+/*
+ * Following is used to keep track of the last cpu
+ * that goes to power down in non cpu power domains.
+ */
+static int last_cpu_in_non_cpu_pd[PSCI_NUM_NON_CPU_PWR_DOMAINS] = {-1};
+
+/*
+ * Following are used to store PSCI STAT values for
+ * CPU and non CPU power domains.
+ */
+static psci_stat_t psci_cpu_stat[PLATFORM_CORE_COUNT]
+				[PLAT_MAX_PWR_LVL_STATES];
+static psci_stat_t psci_non_cpu_stat[PSCI_NUM_NON_CPU_PWR_DOMAINS]
+				[PLAT_MAX_PWR_LVL_STATES];
+
+/*
+ * This functions returns the index into the `psci_stat_t` array given the
+ * local power state and power domain level. If the platform implements the
+ * `get_pwr_lvl_state_idx` pm hook, then that will be used to return the index.
+ */
+static int get_stat_idx(plat_local_state_t local_state, int pwr_lvl)
+{
+	int idx;
+
+	if (psci_plat_pm_ops->get_pwr_lvl_state_idx == NULL) {
+		assert(PLAT_MAX_PWR_LVL_STATES == 2);
+		if (is_local_state_retn(local_state))
+			return 0;
+
+		assert(is_local_state_off(local_state));
+		return 1;
+	}
+
+	idx = psci_plat_pm_ops->get_pwr_lvl_state_idx(local_state, pwr_lvl);
+	assert((idx >= 0) && (idx < PLAT_MAX_PWR_LVL_STATES));
+	return idx;
+}
+
+/*******************************************************************************
+ * This function is passed the target local power states for each power
+ * domain (state_info) between the current CPU domain and its ancestors until
+ * the target power level (end_pwrlvl).
+ *
+ * Then, for each level (apart from the CPU level) until the 'end_pwrlvl', it
+ * updates the `last_cpu_in_non_cpu_pd[]` with last power down cpu id.
+ *
+ * This function will only be invoked with data cache enabled and while
+ * powering down a core.
+ ******************************************************************************/
+void psci_stats_update_pwr_down(unsigned int end_pwrlvl,
+			const psci_power_state_t *state_info)
+{
+	unsigned int lvl, parent_idx, cpu_idx = plat_my_core_pos();
+
+	assert(end_pwrlvl <= PLAT_MAX_PWR_LVL);
+	assert(state_info);
+
+	parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+
+	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
+
+		/* Break early if the target power state is RUN */
+		if (is_local_state_run(state_info->pwr_domain_state[lvl]))
+			break;
+
+		/*
+		 * The power domain is entering a low power state, so this is
+		 * the last CPU for this power domain
+		 */
+		last_cpu_in_non_cpu_pd[parent_idx] = cpu_idx;
+
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+
+}
+
+/*******************************************************************************
+ * This function updates the PSCI STATS(residency time and count) for CPU
+ * and NON-CPU power domains.
+ * It is called with caches enabled and locks acquired(for NON-CPU domain)
+ ******************************************************************************/
+void psci_stats_update_pwr_up(unsigned int end_pwrlvl,
+			const psci_power_state_t *state_info)
+{
+	unsigned int lvl, parent_idx, cpu_idx = plat_my_core_pos();
+	int stat_idx;
+	plat_local_state_t local_state;
+	u_register_t residency;
+
+	assert(end_pwrlvl <= PLAT_MAX_PWR_LVL);
+	assert(state_info);
+
+	/* Get the index into the stats array */
+	local_state = state_info->pwr_domain_state[PSCI_CPU_PWR_LVL];
+	stat_idx = get_stat_idx(local_state, PSCI_CPU_PWR_LVL);
+
+	/* Call into platform interface to calculate residency. */
+	residency = plat_psci_stat_get_residency(PSCI_CPU_PWR_LVL,
+	    state_info, cpu_idx);
+
+	/* Update CPU stats. */
+	psci_cpu_stat[cpu_idx][stat_idx].residency += residency;
+	psci_cpu_stat[cpu_idx][stat_idx].count++;
+
+	/*
+	 * Check what power domains above CPU were off
+	 * prior to this CPU powering on.
+	 */
+	parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+	for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) {
+		local_state = state_info->pwr_domain_state[lvl];
+		if (is_local_state_run(local_state)) {
+			/* Break early */
+			break;
+		}
+
+		assert(last_cpu_in_non_cpu_pd[parent_idx] != -1);
+
+		/* Call into platform interface to calculate residency. */
+		residency = plat_psci_stat_get_residency(lvl, state_info,
+		    last_cpu_in_non_cpu_pd[parent_idx]);
+
+		/* Initialize back to reset value */
+		last_cpu_in_non_cpu_pd[parent_idx] = -1;
+
+		/* Get the index into the stats array */
+		stat_idx = get_stat_idx(local_state, lvl);
+
+		/* Update non cpu stats */
+		psci_non_cpu_stat[parent_idx][stat_idx].residency += residency;
+		psci_non_cpu_stat[parent_idx][stat_idx].count++;
+
+		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+	}
+
+}
+
+/*******************************************************************************
+ * This function returns the appropriate count and residency time of the
+ * local state for the highest power level expressed in the `power_state`
+ * for the node represented by `target_cpu`.
+ ******************************************************************************/
+static int psci_get_stat(u_register_t target_cpu, unsigned int power_state,
+			 psci_stat_t *psci_stat)
+{
+	int rc;
+	unsigned int pwrlvl, lvl, parent_idx, stat_idx, target_idx;
+	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
+	plat_local_state_t local_state;
+
+	/* Validate the target_cpu parameter and determine the cpu index */
+	target_idx = plat_core_pos_by_mpidr(target_cpu);
+	if (target_idx == -1)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* Validate the power_state parameter */
+	if (!psci_plat_pm_ops->translate_power_state_by_mpidr)
+		rc = psci_validate_power_state(power_state, &state_info);
+	else
+		rc = psci_plat_pm_ops->translate_power_state_by_mpidr(
+				target_cpu, power_state, &state_info);
+
+	if (rc != PSCI_E_SUCCESS)
+		return PSCI_E_INVALID_PARAMS;
+
+	/* Find the highest power level */
+	pwrlvl = psci_find_target_suspend_lvl(&state_info);
+	if (pwrlvl == PSCI_INVALID_PWR_LVL) {
+		ERROR("Invalid target power level for PSCI statistics operation\n");
+		panic();
+	}
+
+	/* Get the index into the stats array */
+	local_state = state_info.pwr_domain_state[pwrlvl];
+	stat_idx = get_stat_idx(local_state, pwrlvl);
+
+	if (pwrlvl > PSCI_CPU_PWR_LVL) {
+		/* Get the power domain index */
+		parent_idx = psci_cpu_pd_nodes[target_idx].parent_node;
+		for (lvl = PSCI_CPU_PWR_LVL + 1; lvl < pwrlvl; lvl++)
+			parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
+
+		/* Get the non cpu power domain stats */
+		*psci_stat = psci_non_cpu_stat[parent_idx][stat_idx];
+	} else {
+		/* Get the cpu power domain stats */
+		*psci_stat = psci_cpu_stat[target_idx][stat_idx];
+	}
+
+	return PSCI_E_SUCCESS;
+}
+
+/* This is the top level function for PSCI_STAT_RESIDENCY SMC. */
+u_register_t psci_stat_residency(u_register_t target_cpu,
+		unsigned int power_state)
+{
+	psci_stat_t psci_stat;
+	int rc = psci_get_stat(target_cpu, power_state, &psci_stat);
+
+	if (rc == PSCI_E_SUCCESS)
+		return psci_stat.residency;
+	else
+		return 0;
+}
+
+/* This is the top level function for PSCI_STAT_COUNT SMC. */
+u_register_t psci_stat_count(u_register_t target_cpu,
+	unsigned int power_state)
+{
+	psci_stat_t psci_stat;
+	int rc = psci_get_stat(target_cpu, power_state, &psci_stat);
+
+	if (rc == PSCI_E_SUCCESS)
+		return psci_stat.count;
+	else
+		return 0;
+}
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
new file mode 100644
index 00000000..40ecdeea
--- /dev/null
+++ b/lib/psci/psci_suspend.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <context.h>
+#include <context_mgmt.h>
+#include <cpu_data.h>
+#include <debug.h>
+#include <platform.h>
+#include <pmf.h>
+#include <runtime_instr.h>
+#include <stddef.h>
+#include "psci_private.h"
+
+/*******************************************************************************
+ * This function does generic and platform specific operations after a wake-up
+ * from standby/retention states at multiple power levels.
+ ******************************************************************************/
+static void psci_suspend_to_standby_finisher(unsigned int cpu_idx,
+					     unsigned int end_pwrlvl)
+{
+	psci_power_state_t state_info;
+
+	psci_acquire_pwr_domain_locks(end_pwrlvl,
+				cpu_idx);
+
+	/*
+	 * Find out which retention states this CPU has exited from until the
+	 * 'end_pwrlvl'. The exit retention state could be deeper than the entry
+	 * state as a result of state coordination amongst other CPUs post wfi.
+	 */
+	psci_get_target_local_pwr_states(end_pwrlvl, &state_info);
+
+	/*
+	 * Plat. management: Allow the platform to do operations
+	 * on waking up from retention.
+	 */
+	psci_plat_pm_ops->pwr_domain_suspend_finish(&state_info);
+
+	/*
+	 * Set the requested and target state of this CPU and all the higher
+	 * power domain levels for this CPU to run.
+	 */
+	psci_set_pwr_domains_to_run(end_pwrlvl);
+
+	psci_release_pwr_domain_locks(end_pwrlvl,
+				cpu_idx);
+}
+
+/*******************************************************************************
+ * This function does generic and platform specific suspend to power down
+ * operations.
+ ******************************************************************************/
+static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl,
+					  entry_point_info_t *ep,
+					  psci_power_state_t *state_info)
+{
+	unsigned int max_off_lvl = psci_find_max_off_lvl(state_info);
+
+	/* Save PSCI target power level for the suspend finisher handler */
+	psci_set_suspend_pwrlvl(end_pwrlvl);
+
+	/*
+	 * Flush the target power level as it might be accessed on power up with
+	 * Data cache disabled.
+	 */
+	psci_flush_cpu_data(psci_svc_cpu_data.target_pwrlvl);
+
+	/*
+	 * Call the cpu suspend handler registered by the Secure Payload
+	 * Dispatcher to let it do any book-keeping. If the handler encounters an
+	 * error, it's expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_suspend)
+		psci_spd_pm->svc_suspend(max_off_lvl);
+
+#if !HW_ASSISTED_COHERENCY
+	/*
+	 * Plat. management: Allow the platform to perform any early
+	 * actions required to power down the CPU. This might be useful for
+	 * HW_ASSISTED_COHERENCY = 0 platforms that can safely perform these
+	 * actions with data caches enabled.
+	 */
+	if (psci_plat_pm_ops->pwr_domain_suspend_pwrdown_early)
+		psci_plat_pm_ops->pwr_domain_suspend_pwrdown_early(state_info);
+#endif
+
+	/*
+	 * Store the re-entry information for the non-secure world.
+	 */
+	cm_init_my_context(ep);
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+
+	/*
+	 * Flush cache line so that even if CPU power down happens
+	 * the timestamp update is reflected in memory.
+	 */
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		RT_INSTR_ENTER_CFLUSH,
+		PMF_CACHE_MAINT);
+#endif
+
+	/*
+	 * Arch. management. Initiate power down sequence.
+	 * TODO : Introduce a mechanism to query the cache level to flush
+	 * and the cpu-ops power down to perform from the platform.
+	 */
+	psci_do_pwrdown_sequence(max_off_lvl);
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		RT_INSTR_EXIT_CFLUSH,
+		PMF_NO_CACHE_MAINT);
+#endif
+}
+
+/*******************************************************************************
+ * Top level handler which is called when a cpu wants to suspend its execution.
+ * It is assumed that along with suspending the cpu power domain, power domains
+ * at higher levels until the target power level will be suspended as well. It
+ * coordinates with the platform to negotiate the target state for each of
+ * the power domain level till the target power domain level. It then performs
+ * generic, architectural, platform setup and state management required to
+ * suspend that power domain level and power domain levels below it.
+ * e.g. For a cpu that's to be suspended, it could mean programming the
+ * power controller whereas for a cluster that's to be suspended, it will call
+ * the platform specific code which will disable coherency at the interconnect
+ * level if the cpu is the last in the cluster and also the program the power
+ * controller.
+ *
+ * All the required parameter checks are performed at the beginning and after
+ * the state transition has been done, no further error is expected and it is
+ * not possible to undo any of the actions taken beyond that point.
+ ******************************************************************************/
+void psci_cpu_suspend_start(entry_point_info_t *ep,
+			    unsigned int end_pwrlvl,
+			    psci_power_state_t *state_info,
+			    unsigned int is_power_down_state)
+{
+	int skip_wfi = 0;
+	unsigned int idx = plat_my_core_pos();
+
+	/*
+	 * This function must only be called on platforms where the
+	 * CPU_SUSPEND platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->pwr_domain_suspend &&
+			psci_plat_pm_ops->pwr_domain_suspend_finish);
+
+	/*
+	 * This function acquires the lock corresponding to each power
+	 * level so that by the time all locks are taken, the system topology
+	 * is snapshot and state management can be done safely.
+	 */
+	psci_acquire_pwr_domain_locks(end_pwrlvl,
+				      idx);
+
+	/*
+	 * We check if there are any pending interrupts after the delay
+	 * introduced by lock contention to increase the chances of early
+	 * detection that a wake-up interrupt has fired.
+	 */
+	if (read_isr_el1()) {
+		skip_wfi = 1;
+		goto exit;
+	}
+
+	/*
+	 * This function is passed the requested state info and
+	 * it returns the negotiated state info for each power level upto
+	 * the end level specified.
+	 */
+	psci_do_state_coordination(end_pwrlvl, state_info);
+
+#if ENABLE_PSCI_STAT
+	/* Update the last cpu for each level till end_pwrlvl */
+	psci_stats_update_pwr_down(end_pwrlvl, state_info);
+#endif
+
+	if (is_power_down_state)
+		psci_suspend_to_pwrdown_start(end_pwrlvl, ep, state_info);
+
+	/*
+	 * Plat. management: Allow the platform to perform the
+	 * necessary actions to turn off this cpu e.g. set the
+	 * platform defined mailbox with the psci entrypoint,
+	 * program the power controller etc.
+	 */
+	psci_plat_pm_ops->pwr_domain_suspend(state_info);
+
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_start(state_info);
+#endif
+
+exit:
+	/*
+	 * Release the locks corresponding to each power level in the
+	 * reverse order to which they were acquired.
+	 */
+	psci_release_pwr_domain_locks(end_pwrlvl,
+				  idx);
+	if (skip_wfi)
+		return;
+
+	if (is_power_down_state) {
+#if ENABLE_RUNTIME_INSTRUMENTATION
+
+		/*
+		 * Update the timestamp with cache off.  We assume this
+		 * timestamp can only be read from the current CPU and the
+		 * timestamp cache line will be flushed before return to
+		 * normal world on wakeup.
+		 */
+		PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+		    RT_INSTR_ENTER_HW_LOW_PWR,
+		    PMF_NO_CACHE_MAINT);
+#endif
+
+		/* The function calls below must not return */
+		if (psci_plat_pm_ops->pwr_domain_pwr_down_wfi)
+			psci_plat_pm_ops->pwr_domain_pwr_down_wfi(state_info);
+		else
+			psci_power_down_wfi();
+	}
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+	    RT_INSTR_ENTER_HW_LOW_PWR,
+	    PMF_NO_CACHE_MAINT);
+#endif
+
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_start(state_info);
+#endif
+
+	/*
+	 * We will reach here if only retention/standby states have been
+	 * requested at multiple power levels. This means that the cpu
+	 * context will be preserved.
+	 */
+	wfi();
+
+#if ENABLE_PSCI_STAT
+	plat_psci_stat_accounting_stop(state_info);
+	psci_stats_update_pwr_up(end_pwrlvl, state_info);
+#endif
+
+#if ENABLE_RUNTIME_INSTRUMENTATION
+	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
+	    RT_INSTR_EXIT_HW_LOW_PWR,
+	    PMF_NO_CACHE_MAINT);
+#endif
+
+	/*
+	 * After we wake up from context retaining suspend, call the
+	 * context retaining suspend finisher.
+	 */
+	psci_suspend_to_standby_finisher(idx, end_pwrlvl);
+}
+
+/*******************************************************************************
+ * The following functions finish an earlier suspend request. They
+ * are called by the common finisher routine in psci_common.c. The `state_info`
+ * is the psci_power_state from which this CPU has woken up from.
+ ******************************************************************************/
+void psci_cpu_suspend_finish(unsigned int cpu_idx,
+			     psci_power_state_t *state_info)
+{
+	unsigned int counter_freq;
+	unsigned int max_off_lvl;
+
+	/* Ensure we have been woken up from a suspended state */
+	assert(psci_get_aff_info_state() == AFF_STATE_ON && is_local_state_off(\
+			state_info->pwr_domain_state[PSCI_CPU_PWR_LVL]));
+
+	/*
+	 * Plat. management: Perform the platform specific actions
+	 * before we change the state of the cpu e.g. enabling the
+	 * gic or zeroing the mailbox register. If anything goes
+	 * wrong then assert as there is no way to recover from this
+	 * situation.
+	 */
+	psci_plat_pm_ops->pwr_domain_suspend_finish(state_info);
+
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+	/* Arch. management: Enable the data cache, stack memory maintenance. */
+	psci_do_pwrup_cache_maintenance();
+#endif
+
+	/* Re-init the cntfrq_el0 register */
+	counter_freq = plat_get_syscnt_freq2();
+	write_cntfrq_el0(counter_freq);
+
+	/*
+	 * Call the cpu suspend finish handler registered by the Secure Payload
+	 * Dispatcher to let it do any bookeeping. If the handler encounters an
+	 * error, it's expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_suspend_finish) {
+		max_off_lvl = psci_find_max_off_lvl(state_info);
+		assert (max_off_lvl != PSCI_INVALID_PWR_LVL);
+		psci_spd_pm->svc_suspend_finish(max_off_lvl);
+	}
+
+	/* Invalidate the suspend level for the cpu */
+	psci_set_suspend_pwrlvl(PSCI_INVALID_PWR_LVL);
+
+	/*
+	 * Generic management: Now we just need to retrieve the
+	 * information that we had stashed away during the suspend
+	 * call to set this cpu on its way.
+	 */
+	cm_prepare_el3_exit(NON_SECURE);
+}
diff --git a/lib/psci/psci_system_off.c b/lib/psci/psci_system_off.c
new file mode 100644
index 00000000..13e9f4aa
--- /dev/null
+++ b/lib/psci/psci_system_off.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <console.h>
+#include <debug.h>
+#include <platform.h>
+#include <stddef.h>
+#include "psci_private.h"
+
+void __dead2 psci_system_off(void)
+{
+	psci_print_power_domain_map();
+
+	assert(psci_plat_pm_ops->system_off);
+
+	/* Notify the Secure Payload Dispatcher */
+	if (psci_spd_pm && psci_spd_pm->svc_system_off) {
+		psci_spd_pm->svc_system_off();
+	}
+
+	console_flush();
+
+	/* Call the platform specific hook */
+	psci_plat_pm_ops->system_off();
+
+	/* This function does not return. We should never get here */
+}
+
+void __dead2 psci_system_reset(void)
+{
+	psci_print_power_domain_map();
+
+	assert(psci_plat_pm_ops->system_reset);
+
+	/* Notify the Secure Payload Dispatcher */
+	if (psci_spd_pm && psci_spd_pm->svc_system_reset) {
+		psci_spd_pm->svc_system_reset();
+	}
+
+	console_flush();
+
+	/* Call the platform specific hook */
+	psci_plat_pm_ops->system_reset();
+
+	/* This function does not return. We should never get here */
+}
+
+int psci_system_reset2(uint32_t reset_type, u_register_t cookie)
+{
+	int is_vendor;
+
+	psci_print_power_domain_map();
+
+	assert(psci_plat_pm_ops->system_reset2);
+
+	is_vendor = (reset_type >> PSCI_RESET2_TYPE_VENDOR_SHIFT) & 1;
+	if (!is_vendor) {
+		/*
+		 * Only WARM_RESET is allowed for architectural type resets.
+		 */
+		if (reset_type != PSCI_RESET2_SYSTEM_WARM_RESET)
+			return PSCI_E_INVALID_PARAMS;
+		if (psci_plat_pm_ops->write_mem_protect &&
+		    psci_plat_pm_ops->write_mem_protect(0) < 0) {
+			return PSCI_E_NOT_SUPPORTED;
+		}
+	}
+
+	/* Notify the Secure Payload Dispatcher */
+	if (psci_spd_pm && psci_spd_pm->svc_system_reset) {
+		psci_spd_pm->svc_system_reset();
+	}
+	console_flush();
+
+	return psci_plat_pm_ops->system_reset2(is_vendor, reset_type, cookie);
+}
diff --git a/lib/semihosting/aarch32/semihosting_call.S b/lib/semihosting/aarch32/semihosting_call.S
new file mode 100644
index 00000000..aced3d12
--- /dev/null
+++ b/lib/semihosting/aarch32/semihosting_call.S
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+
+	.globl	semihosting_call
+
+func semihosting_call
+	svc	#0x123456
+	bx	lr
+endfunc semihosting_call
diff --git a/lib/semihosting/aarch64/semihosting_call.S b/lib/semihosting/aarch64/semihosting_call.S
index e6a96752..97d2bcab 100644
--- a/lib/semihosting/aarch64/semihosting_call.S
+++ b/lib/semihosting/aarch64/semihosting_call.S
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <asm_macros.S>
@@ -35,3 +11,4 @@
 func semihosting_call
 	hlt	#0xf000
 	ret
+endfunc semihosting_call
diff --git a/lib/semihosting/semihosting.c b/lib/semihosting/semihosting.c
index 849ec120..2ba43f34 100644
--- a/lib/semihosting/semihosting.c
+++ b/lib/semihosting/semihosting.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <assert.h>
@@ -125,6 +101,7 @@ long semihosting_file_write(long file_handle,
 			    const uintptr_t buffer)
 {
 	smh_file_read_write_block_t write_block;
+	long result = -EINVAL;
 
 	if ((length == NULL) || (buffer == (uintptr_t)NULL))
 		return -EINVAL;
@@ -133,10 +110,12 @@ long semihosting_file_write(long file_handle,
 	write_block.buffer = (uintptr_t)buffer; /* cast away const */
 	write_block.length = *length;
 
-	*length = semihosting_call(SEMIHOSTING_SYS_WRITE,
+	result = semihosting_call(SEMIHOSTING_SYS_WRITE,
 				   (void *) &write_block);
 
-	return *length;
+	*length = result;
+
+	return (result == 0) ? 0 : -EINVAL;
 }
 
 long semihosting_file_close(long file_handle)
diff --git a/lib/stack_protector/aarch32/asm_stack_protector.S b/lib/stack_protector/aarch32/asm_stack_protector.S
new file mode 100644
index 00000000..19b75258
--- /dev/null
+++ b/lib/stack_protector/aarch32/asm_stack_protector.S
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+
+	.globl	update_stack_protector_canary
+
+/* -----------------------------------------------------------------------
+ * void update_stack_protector_canary(void)
+ *
+ * Change the value of the canary used for stack smashing attacks protection.
+ * Note: This must be called when it is safe to call C code, but this cannot be
+ * called by C code. Doing this will make the check fail when the calling
+ * function returns.
+ * -----------------------------------------------------------------------
+ */
+
+func update_stack_protector_canary
+	/* Use r4 as it is callee-saved */
+	mov	r4, lr
+	bl	plat_get_stack_protector_canary
+
+	/* Update the canary with the returned value */
+	ldr	r1,  =__stack_chk_guard
+	str	r0, [r1]
+	bx	r4
+endfunc update_stack_protector_canary
+
+
diff --git a/lib/stack_protector/aarch64/asm_stack_protector.S b/lib/stack_protector/aarch64/asm_stack_protector.S
new file mode 100644
index 00000000..c2245d36
--- /dev/null
+++ b/lib/stack_protector/aarch64/asm_stack_protector.S
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <assert_macros.S>
+
+	.globl	update_stack_protector_canary
+
+/* -----------------------------------------------------------------------
+ * void update_stack_protector_canary(void)
+ *
+ * Change the value of the canary used for stack smashing attacks protection.
+ * Note: This must be called when it is safe to call C code, but this cannot be
+ * called by C code. Doing this will make the check fail when the calling
+ * function returns.
+ * -----------------------------------------------------------------------
+ */
+
+func update_stack_protector_canary
+	/* Use x19 as it is callee-saved */
+	mov	x19, x30
+	bl	plat_get_stack_protector_canary
+
+	/* Update the canary with the returned value */
+	adrp	x1,  __stack_chk_guard
+	str	x0, [x1, #:lo12:__stack_chk_guard]
+	ret	x19
+endfunc update_stack_protector_canary
+
+
diff --git a/lib/stack_protector/stack_protector.c b/lib/stack_protector/stack_protector.c
new file mode 100644
index 00000000..fba5e1ff
--- /dev/null
+++ b/lib/stack_protector/stack_protector.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <debug.h>
+#include <platform.h>
+#include <stdint.h>
+
+/*
+ * Canary value used by the compiler runtime checks to detect stack corruption.
+ *
+ * Force the canary to be in .data to allow predictable memory layout relatively
+ * to the stacks.
+ */
+u_register_t  __attribute__((section(".data.stack_protector_canary")))
+	__stack_chk_guard = (u_register_t) 3288484550995823360ULL;
+
+/*
+ * Function called when the stack's canary check fails, which means the stack
+ * was corrupted. It must not return.
+ */
+void __dead2 __stack_chk_fail(void)
+{
+#if DEBUG
+	ERROR("Stack corruption detected\n");
+#endif
+	panic();
+}
+
diff --git a/lib/stack_protector/stack_protector.mk b/lib/stack_protector/stack_protector.mk
new file mode 100644
index 00000000..0f0d90fb
--- /dev/null
+++ b/lib/stack_protector/stack_protector.mk
@@ -0,0 +1,19 @@
+#
+# Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+# Boolean macro to be used in C code
+STACK_PROTECTOR_ENABLED := 0
+
+ifneq (${ENABLE_STACK_PROTECTOR},0)
+STACK_PROTECTOR_ENABLED := 1
+BL_COMMON_SOURCES	+=	lib/stack_protector/stack_protector.c			\
+				lib/stack_protector/${ARCH}/asm_stack_protector.S
+
+TF_CFLAGS		+=	-fstack-protector-${ENABLE_STACK_PROTECTOR}
+endif
+
+$(eval $(call add_define,STACK_PROTECTOR_ENABLED))
+
diff --git a/lib/stdlib/abort.c b/lib/stdlib/abort.c
index 862bf9c4..af19ccfa 100644
--- a/lib/stdlib/abort.c
+++ b/lib/stdlib/abort.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <debug.h>
diff --git a/lib/stdlib/assert.c b/lib/stdlib/assert.c
index 90a1afe5..97fab4b0 100644
--- a/lib/stdlib/assert.c
+++ b/lib/stdlib/assert.c
@@ -1,41 +1,36 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <assert.h>
+#include <console.h>
 #include <debug.h>
+#include <platform.h>
 
 /*
- * This is a basic implementation. This could be improved.
- */
-void __assert (const char *function, const char *file, unsigned int line,
-		const char *assertion)
+* Only print the output if PLAT_LOG_LEVEL_ASSERT is higher or equal to
+* LOG_LEVEL_INFO, which is the default value for builds with DEBUG=1.
+*/
+
+#if PLAT_LOG_LEVEL_ASSERT >= LOG_LEVEL_VERBOSE
+void __assert(const char *file, unsigned int line, const char *assertion)
+{
+	tf_printf("ASSERT: %s:%d:%s\n", file, line, assertion);
+	console_flush();
+	plat_panic_handler();
+}
+#elif PLAT_LOG_LEVEL_ASSERT >= LOG_LEVEL_INFO
+void __assert(const char *file, unsigned int line)
+{
+	tf_printf("ASSERT: %s:%d\n", file, line);
+	console_flush();
+	plat_panic_handler();
+}
+#else
+void __assert(void)
 {
-	tf_printf("ASSERT: %s <%d> : %s\n", function, line, assertion);
-	while(1);
+	plat_panic_handler();
 }
+#endif
diff --git a/lib/stdlib/exit.c b/lib/stdlib/exit.c
index 3e775914..3d23d7be 100644
--- a/lib/stdlib/exit.c
+++ b/lib/stdlib/exit.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <debug.h>
diff --git a/lib/stdlib/mem.c b/lib/stdlib/mem.c
index f1f335a6..65b62fde 100644
--- a/lib/stdlib/mem.c
+++ b/lib/stdlib/mem.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <stddef.h> /* size_t */
@@ -48,10 +24,10 @@ void *memset(void *dst, int val, size_t count)
  */
 int memcmp(const void *s1, const void *s2, size_t len)
 {
-	const char *s = s1;
-	const char *d = s2;
-	char dc;
-	char sc;
+	const unsigned char *s = s1;
+	const unsigned char *d = s2;
+	unsigned char sc;
+	unsigned char dc;
 
 	while (len--) {
 		sc = *s++;
diff --git a/lib/stdlib/printf.c b/lib/stdlib/printf.c
index 323ec0f8..f6156414 100644
--- a/lib/stdlib/printf.c
+++ b/lib/stdlib/printf.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <stdio.h>
diff --git a/lib/stdlib/putchar.c b/lib/stdlib/putchar.c
index 85e4fbdf..8265667b 100644
--- a/lib/stdlib/putchar.c
+++ b/lib/stdlib/putchar.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <stdio.h>
diff --git a/lib/stdlib/puts.c b/lib/stdlib/puts.c
index ca88fc5c..693a6bff 100644
--- a/lib/stdlib/puts.c
+++ b/lib/stdlib/puts.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <stdio.h>
diff --git a/lib/stdlib/sscanf.c b/lib/stdlib/sscanf.c
index e9f5c4a1..674ae79f 100644
--- a/lib/stdlib/sscanf.c
+++ b/lib/stdlib/sscanf.c
@@ -1,31 +1,7 @@
 /*
  * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <sys/cdefs.h>
diff --git a/lib/stdlib/std.c b/lib/stdlib/std.c
deleted file mode 100644
index 5f6ef752..00000000
--- a/lib/stdlib/std.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * Neither the name of ARM nor the names of its contributors may be used
- * to endorse or promote products derived from this software without specific
- * prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-/* Include the various implemented functions */
-#include "abort.c"
-#include "assert.c"
-#include "exit.c"
-#include "mem.c"
-#include "printf.c"
-#include "putchar.c"
-#include "puts.c"
-#include "sscanf.c"
-#include "strchr.c"
-#include "strcmp.c"
-#include "strlen.c"
-#include "strncmp.c"
-#include "subr_prf.c"
diff --git a/lib/stdlib/stdlib.mk b/lib/stdlib/stdlib.mk
new file mode 100644
index 00000000..82116235
--- /dev/null
+++ b/lib/stdlib/stdlib.mk
@@ -0,0 +1,25 @@
+#
+# Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+STDLIB_SRCS	:=	$(addprefix lib/stdlib/,	\
+			abort.c				\
+			assert.c			\
+			exit.c				\
+			mem.c				\
+			printf.c			\
+			putchar.c			\
+			puts.c				\
+			sscanf.c			\
+			strchr.c			\
+			strcmp.c			\
+			strlen.c			\
+			strncmp.c			\
+			strnlen.c			\
+			subr_prf.c			\
+			timingsafe_bcmp.c)
+
+INCLUDES	+=	-Iinclude/lib/stdlib		\
+			-Iinclude/lib/stdlib/sys
diff --git a/lib/stdlib/strnlen.c b/lib/stdlib/strnlen.c
new file mode 100644
index 00000000..d48502bd
--- /dev/null
+++ b/lib/stdlib/strnlen.c
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ */
+
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+size_t
+strnlen(const char *s, size_t maxlen)
+{
+	size_t len;
+
+	for (len = 0; len < maxlen; len++, s++) {
+		if (!*s)
+			break;
+	}
+	return (len);
+}
diff --git a/lib/stdlib/timingsafe_bcmp.c b/lib/stdlib/timingsafe_bcmp.c
new file mode 100644
index 00000000..d0981580
--- /dev/null
+++ b/lib/stdlib/timingsafe_bcmp.c
@@ -0,0 +1,36 @@
+/*	$OpenBSD: timingsafe_bcmp.c,v 1.3 2015/08/31 02:53:57 guenther Exp $	*/
+/*
+ * Copyright (c) 2010 Damien Miller.  All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <string.h>
+
+int __timingsafe_bcmp(const void *, const void *, size_t);
+
+int
+__timingsafe_bcmp(const void *b1, const void *b2, size_t n)
+{
+	const unsigned char *p1 = b1, *p2 = b2;
+	int ret = 0;
+
+	for (; n > 0; n--)
+		ret |= *p1++ ^ *p2++;
+	return (ret != 0);
+}
+
+__weak_reference(__timingsafe_bcmp, timingsafe_bcmp);
diff --git a/lib/utils/mem_region.c b/lib/utils/mem_region.c
new file mode 100644
index 00000000..31c6231f
--- /dev/null
+++ b/lib/utils/mem_region.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <utils.h>
+
+/*
+ * All the regions defined in mem_region_t must have the following properties
+ *
+ * - Any contiguous regions must be merged into a single entry.
+ * - The number of bytes of each region must be greater than zero.
+ * - The calculation of the highest address within the region (base + nbytes-1)
+ *   doesn't produce an overflow.
+ *
+ * These conditions must be fulfilled by the caller and they aren't checked
+ * at runtime.
+ */
+
+/*
+ * zero_normalmem all the regions defined in tbl.
+ * It assumes that MMU is enabled and the memory is Normal memory.
+ * tbl must be a valid pointer to a memory mem_region_t array,
+ * nregions is the size of the array.
+ */
+void clear_mem_regions(mem_region_t *tbl, size_t nregions)
+{
+	size_t i;
+
+	assert(tbl);
+	assert(nregions > 0);
+
+	for (i = 0; i < nregions; i++) {
+		assert(tbl->nbytes > 0);
+		assert(!check_uptr_overflow(tbl->base, tbl->nbytes-1));
+		zero_normalmem((void *) (tbl->base), tbl->nbytes);
+		tbl++;
+	}
+}
+
+/*
+ * This function checks that a region (addr + nbytes-1) of memory is totally
+ * covered by one of the regions defined in tbl.
+ * tbl must be a valid pointer to a memory mem_region_t array, nregions
+ * is the size of the array and the region described by addr and nbytes must
+ * not generate an overflow.
+ * Returns:
+ *  -1 means that the region is not covered by any of the regions
+ *     described in tbl.
+ *   0 the region (addr + nbytes-1) is covered by one of the regions described
+ *     in tbl
+ */
+int mem_region_in_array_chk(mem_region_t *tbl, size_t nregions,
+			    uintptr_t addr, size_t nbytes)
+{
+	uintptr_t region_start, region_end, start, end;
+	size_t i;
+
+	assert(tbl);
+	assert(nbytes > 0);
+	assert(!check_uptr_overflow(addr, nbytes-1));
+
+	region_start = addr;
+	region_end = addr + (nbytes - 1);
+	for (i = 0; i < nregions; i++) {
+		assert(tbl->nbytes > 0);
+		assert(!check_uptr_overflow(tbl->base, tbl->nbytes-1));
+		start = tbl->base;
+		end = start + (tbl->nbytes - 1);
+		if (region_start >= start && region_end <= end)
+			return 0;
+		tbl++;
+	}
+
+	return -1;
+}
diff --git a/lib/xlat_tables/aarch32/xlat_tables.c b/lib/xlat_tables/aarch32/xlat_tables.c
new file mode 100644
index 00000000..c7e34f20
--- /dev/null
+++ b/lib/xlat_tables/aarch32/xlat_tables.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <platform_def.h>
+#include <utils.h>
+#include <xlat_tables_arch.h>
+#include <xlat_tables.h>
+#include "../xlat_tables_private.h"
+
+#define XLAT_TABLE_LEVEL_BASE	\
+       GET_XLAT_TABLE_LEVEL_BASE(PLAT_VIRT_ADDR_SPACE_SIZE)
+
+#define NUM_BASE_LEVEL_ENTRIES	\
+       GET_NUM_BASE_LEVEL_ENTRIES(PLAT_VIRT_ADDR_SPACE_SIZE)
+
+static uint64_t base_xlation_table[NUM_BASE_LEVEL_ENTRIES]
+		__aligned(NUM_BASE_LEVEL_ENTRIES * sizeof(uint64_t));
+
+#if ENABLE_ASSERTIONS
+static unsigned long long get_max_supported_pa(void)
+{
+	/* Physical address space size for long descriptor format. */
+	return (1ULL << 40) - 1ULL;
+}
+#endif /* ENABLE_ASSERTIONS */
+
+int xlat_arch_current_el(void)
+{
+	/*
+	 * If EL3 is in AArch32 mode, all secure PL1 modes (Monitor, System,
+	 * SVC, Abort, UND, IRQ and FIQ modes) execute at EL3.
+	 */
+	return 3;
+}
+
+uint64_t xlat_arch_get_xn_desc(int el __unused)
+{
+	return UPPER_ATTRS(XN);
+}
+
+void init_xlat_tables(void)
+{
+	unsigned long long max_pa;
+	uintptr_t max_va;
+	print_mmap();
+	init_xlation_table(0, base_xlation_table, XLAT_TABLE_LEVEL_BASE,
+						&max_va, &max_pa);
+
+	assert(max_va <= PLAT_VIRT_ADDR_SPACE_SIZE - 1);
+	assert(max_pa <= PLAT_PHY_ADDR_SPACE_SIZE - 1);
+	assert((PLAT_PHY_ADDR_SPACE_SIZE - 1) <= get_max_supported_pa());
+}
+
+/*******************************************************************************
+ * Function for enabling the MMU in Secure PL1, assuming that the
+ * page-tables have already been created.
+ ******************************************************************************/
+void enable_mmu_secure(unsigned int flags)
+{
+	unsigned int mair0, ttbcr, sctlr;
+	uint64_t ttbr0;
+
+	assert(IS_IN_SECURE());
+	assert((read_sctlr() & SCTLR_M_BIT) == 0);
+
+	/* Set attributes in the right indices of the MAIR */
+	mair0 = MAIR0_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);
+	mair0 |= MAIR0_ATTR_SET(ATTR_IWBWA_OWBWA_NTR,
+			ATTR_IWBWA_OWBWA_NTR_INDEX);
+	mair0 |= MAIR0_ATTR_SET(ATTR_NON_CACHEABLE,
+			ATTR_NON_CACHEABLE_INDEX);
+	write_mair0(mair0);
+
+	/* Invalidate TLBs at the current exception level */
+	tlbiall();
+
+	/*
+	 * Set TTBCR bits as well. Set TTBR0 table properties. Disable TTBR1.
+	 */
+	if (flags & XLAT_TABLE_NC) {
+		/* Inner & outer non-cacheable non-shareable. */
+		ttbcr = TTBCR_EAE_BIT |
+			TTBCR_SH0_NON_SHAREABLE | TTBCR_RGN0_OUTER_NC |
+			TTBCR_RGN0_INNER_NC |
+			(32 - __builtin_ctzll(PLAT_VIRT_ADDR_SPACE_SIZE));
+	} else {
+		/* Inner & outer WBWA & shareable. */
+		ttbcr = TTBCR_EAE_BIT |
+			TTBCR_SH0_INNER_SHAREABLE | TTBCR_RGN0_OUTER_WBA |
+			TTBCR_RGN0_INNER_WBA |
+			(32 - __builtin_ctzll(PLAT_VIRT_ADDR_SPACE_SIZE));
+	}
+	ttbcr |= TTBCR_EPD1_BIT;
+	write_ttbcr(ttbcr);
+
+	/* Set TTBR0 bits as well */
+	ttbr0 = (uintptr_t) base_xlation_table;
+	write64_ttbr0(ttbr0);
+	write64_ttbr1(0);
+
+	/*
+	 * Ensure all translation table writes have drained
+	 * into memory, the TLB invalidation is complete,
+	 * and translation register writes are committed
+	 * before enabling the MMU
+	 */
+	dsbish();
+	isb();
+
+	sctlr = read_sctlr();
+	sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;
+
+	if (flags & DISABLE_DCACHE)
+		sctlr &= ~SCTLR_C_BIT;
+	else
+		sctlr |= SCTLR_C_BIT;
+
+	write_sctlr(sctlr);
+
+	/* Ensure the MMU enable takes effect immediately */
+	isb();
+}
diff --git a/lib/xlat_tables/aarch64/xlat_tables.c b/lib/xlat_tables/aarch64/xlat_tables.c
new file mode 100644
index 00000000..28ae1f73
--- /dev/null
+++ b/lib/xlat_tables/aarch64/xlat_tables.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <common_def.h>
+#include <platform_def.h>
+#include <sys/types.h>
+#include <utils.h>
+#include <xlat_tables.h>
+#include <xlat_tables_arch.h>
+#include "../xlat_tables_private.h"
+
+#define XLAT_TABLE_LEVEL_BASE	\
+       GET_XLAT_TABLE_LEVEL_BASE(PLAT_VIRT_ADDR_SPACE_SIZE)
+
+#define NUM_BASE_LEVEL_ENTRIES	\
+       GET_NUM_BASE_LEVEL_ENTRIES(PLAT_VIRT_ADDR_SPACE_SIZE)
+
+static uint64_t base_xlation_table[NUM_BASE_LEVEL_ENTRIES]
+		__aligned(NUM_BASE_LEVEL_ENTRIES * sizeof(uint64_t));
+
+static unsigned long long tcr_ps_bits;
+
+static unsigned long long calc_physical_addr_size_bits(
+					unsigned long long max_addr)
+{
+	/* Physical address can't exceed 48 bits */
+	assert((max_addr & ADDR_MASK_48_TO_63) == 0);
+
+	/* 48 bits address */
+	if (max_addr & ADDR_MASK_44_TO_47)
+		return TCR_PS_BITS_256TB;
+
+	/* 44 bits address */
+	if (max_addr & ADDR_MASK_42_TO_43)
+		return TCR_PS_BITS_16TB;
+
+	/* 42 bits address */
+	if (max_addr & ADDR_MASK_40_TO_41)
+		return TCR_PS_BITS_4TB;
+
+	/* 40 bits address */
+	if (max_addr & ADDR_MASK_36_TO_39)
+		return TCR_PS_BITS_1TB;
+
+	/* 36 bits address */
+	if (max_addr & ADDR_MASK_32_TO_35)
+		return TCR_PS_BITS_64GB;
+
+	return TCR_PS_BITS_4GB;
+}
+
+#if ENABLE_ASSERTIONS
+/* Physical Address ranges supported in the AArch64 Memory Model */
+static const unsigned int pa_range_bits_arr[] = {
+	PARANGE_0000, PARANGE_0001, PARANGE_0010, PARANGE_0011, PARANGE_0100,
+	PARANGE_0101
+};
+
+static unsigned long long get_max_supported_pa(void)
+{
+	u_register_t pa_range = read_id_aa64mmfr0_el1() &
+						ID_AA64MMFR0_EL1_PARANGE_MASK;
+
+	/* All other values are reserved */
+	assert(pa_range < ARRAY_SIZE(pa_range_bits_arr));
+
+	return (1ULL << pa_range_bits_arr[pa_range]) - 1ULL;
+}
+#endif /* ENABLE_ASSERTIONS */
+
+int xlat_arch_current_el(void)
+{
+	int el = GET_EL(read_CurrentEl());
+
+	assert(el > 0);
+
+	return el;
+}
+
+uint64_t xlat_arch_get_xn_desc(int el)
+{
+	if (el == 3) {
+		return UPPER_ATTRS(XN);
+	} else {
+		assert(el == 1);
+		return UPPER_ATTRS(PXN);
+	}
+}
+
+void init_xlat_tables(void)
+{
+	unsigned long long max_pa;
+	uintptr_t max_va;
+	print_mmap();
+	init_xlation_table(0, base_xlation_table, XLAT_TABLE_LEVEL_BASE,
+			   &max_va, &max_pa);
+
+	assert(max_va <= PLAT_VIRT_ADDR_SPACE_SIZE - 1);
+	assert(max_pa <= PLAT_PHY_ADDR_SPACE_SIZE - 1);
+	assert((PLAT_PHY_ADDR_SPACE_SIZE - 1) <= get_max_supported_pa());
+
+	tcr_ps_bits = calc_physical_addr_size_bits(max_pa);
+}
+
+/*******************************************************************************
+ * Macro generating the code for the function enabling the MMU in the given
+ * exception level, assuming that the pagetables have already been created.
+ *
+ *   _el:		Exception level at which the function will run
+ *   _tcr_extra:	Extra bits to set in the TCR register. This mask will
+ *			be OR'ed with the default TCR value.
+ *   _tlbi_fct:		Function to invalidate the TLBs at the current
+ *			exception level
+ ******************************************************************************/
+#define DEFINE_ENABLE_MMU_EL(_el, _tcr_extra, _tlbi_fct)		\
+	void enable_mmu_el##_el(unsigned int flags)				\
+	{								\
+		uint64_t mair, tcr, ttbr;				\
+		uint32_t sctlr;						\
+									\
+		assert(IS_IN_EL(_el));					\
+		assert((read_sctlr_el##_el() & SCTLR_M_BIT) == 0);	\
+									\
+		/* Set attributes in the right indices of the MAIR */	\
+		mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);	\
+		mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR,		\
+				ATTR_IWBWA_OWBWA_NTR_INDEX);		\
+		mair |= MAIR_ATTR_SET(ATTR_NON_CACHEABLE,		\
+				ATTR_NON_CACHEABLE_INDEX);		\
+		write_mair_el##_el(mair);				\
+									\
+		/* Invalidate TLBs at the current exception level */	\
+		_tlbi_fct();						\
+									\
+		/* Set TCR bits as well. */				\
+		/* Set T0SZ to (64 - width of virtual address space) */	\
+		if (flags & XLAT_TABLE_NC) {				\
+			/* Inner & outer non-cacheable non-shareable. */\
+			tcr = TCR_SH_NON_SHAREABLE |			\
+				TCR_RGN_OUTER_NC | TCR_RGN_INNER_NC |	\
+				(64 - __builtin_ctzll(PLAT_VIRT_ADDR_SPACE_SIZE));\
+		} else {						\
+			/* Inner & outer WBWA & shareable. */		\
+			tcr = TCR_SH_INNER_SHAREABLE |			\
+				TCR_RGN_OUTER_WBA | TCR_RGN_INNER_WBA |	\
+				(64 - __builtin_ctzll(PLAT_VIRT_ADDR_SPACE_SIZE));\
+		}							\
+		tcr |= _tcr_extra;					\
+		write_tcr_el##_el(tcr);					\
+									\
+		/* Set TTBR bits as well */				\
+		ttbr = (uint64_t) base_xlation_table;			\
+		write_ttbr0_el##_el(ttbr);				\
+									\
+		/* Ensure all translation table writes have drained */	\
+		/* into memory, the TLB invalidation is complete, */	\
+		/* and translation register writes are committed */	\
+		/* before enabling the MMU */				\
+		dsbish();						\
+		isb();							\
+									\
+		sctlr = read_sctlr_el##_el();				\
+		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;			\
+									\
+		if (flags & DISABLE_DCACHE)				\
+			sctlr &= ~SCTLR_C_BIT;				\
+		else							\
+			sctlr |= SCTLR_C_BIT;				\
+									\
+		write_sctlr_el##_el(sctlr);				\
+									\
+		/* Ensure the MMU enable takes effect immediately */	\
+		isb();							\
+	}
+
+/* Define EL1 and EL3 variants of the function enabling the MMU */
+DEFINE_ENABLE_MMU_EL(1,
+		/*
+		 * TCR_EL1.EPD1: Disable translation table walk for addresses
+		 * that are translated using TTBR1_EL1.
+		 */
+		TCR_EPD1_BIT | (tcr_ps_bits << TCR_EL1_IPS_SHIFT),
+		tlbivmalle1)
+DEFINE_ENABLE_MMU_EL(3,
+		TCR_EL3_RES1 | (tcr_ps_bits << TCR_EL3_PS_SHIFT),
+		tlbialle3)
diff --git a/lib/xlat_tables/xlat_tables_common.c b/lib/xlat_tables/xlat_tables_common.c
new file mode 100644
index 00000000..c6fa10ee
--- /dev/null
+++ b/lib/xlat_tables/xlat_tables_common.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <cassert.h>
+#include <common_def.h>
+#include <debug.h>
+#include <platform_def.h>
+#include <string.h>
+#include <types.h>
+#include <utils.h>
+#include <xlat_tables.h>
+#include "xlat_tables_private.h"
+
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+#define LVL0_SPACER ""
+#define LVL1_SPACER "  "
+#define LVL2_SPACER "    "
+#define LVL3_SPACER "      "
+#define get_level_spacer(level)		\
+			(((level) == U(0)) ? LVL0_SPACER : \
+			(((level) == U(1)) ? LVL1_SPACER : \
+			(((level) == U(2)) ? LVL2_SPACER : LVL3_SPACER)))
+#define debug_print(...) tf_printf(__VA_ARGS__)
+#else
+#define debug_print(...) ((void)0)
+#endif
+
+#define UNSET_DESC	~0ull
+
+static uint64_t xlat_tables[MAX_XLAT_TABLES][XLAT_TABLE_ENTRIES]
+			__aligned(XLAT_TABLE_SIZE) __section("xlat_table");
+
+static unsigned int next_xlat;
+static unsigned long long xlat_max_pa;
+static uintptr_t xlat_max_va;
+
+static uint64_t execute_never_mask;
+
+/*
+ * Array of all memory regions stored in order of ascending base address.
+ * The list is terminated by the first entry with size == 0.
+ */
+static mmap_region_t mmap[MAX_MMAP_REGIONS + 1];
+
+
+void print_mmap(void)
+{
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+	debug_print("mmap:\n");
+	mmap_region_t *mm = mmap;
+	while (mm->size) {
+		debug_print(" VA:%p  PA:0x%llx  size:0x%zx  attr:0x%x\n",
+				(void *)mm->base_va, mm->base_pa,
+				mm->size, mm->attr);
+		++mm;
+	};
+	debug_print("\n");
+#endif
+}
+
+void mmap_add_region(unsigned long long base_pa, uintptr_t base_va,
+			size_t size, mmap_attr_t attr)
+{
+	mmap_region_t *mm = mmap;
+	mmap_region_t *mm_last = mm + ARRAY_SIZE(mmap) - 1;
+	unsigned long long end_pa = base_pa + size - 1;
+	uintptr_t end_va = base_va + size - 1;
+
+	assert(IS_PAGE_ALIGNED(base_pa));
+	assert(IS_PAGE_ALIGNED(base_va));
+	assert(IS_PAGE_ALIGNED(size));
+
+	if (!size)
+		return;
+
+	assert(base_pa < end_pa); /* Check for overflows */
+	assert(base_va < end_va);
+
+	assert((base_va + (uintptr_t)size - (uintptr_t)1) <=
+					(PLAT_VIRT_ADDR_SPACE_SIZE - 1));
+	assert((base_pa + (unsigned long long)size - 1ULL) <=
+					(PLAT_PHY_ADDR_SPACE_SIZE - 1));
+
+#if ENABLE_ASSERTIONS
+
+	/* Check for PAs and VAs overlaps with all other regions */
+	for (mm = mmap; mm->size; ++mm) {
+
+		uintptr_t mm_end_va = mm->base_va + mm->size - 1;
+
+		/*
+		 * Check if one of the regions is completely inside the other
+		 * one.
+		 */
+		int fully_overlapped_va =
+			((base_va >= mm->base_va) && (end_va <= mm_end_va)) ||
+			((mm->base_va >= base_va) && (mm_end_va <= end_va));
+
+		/*
+		 * Full VA overlaps are only allowed if both regions are
+		 * identity mapped (zero offset) or have the same VA to PA
+		 * offset. Also, make sure that it's not the exact same area.
+		 */
+		if (fully_overlapped_va) {
+			assert((mm->base_va - mm->base_pa) ==
+			       (base_va - base_pa));
+			assert((base_va != mm->base_va) || (size != mm->size));
+		} else {
+			/*
+			 * If the regions do not have fully overlapping VAs,
+			 * then they must have fully separated VAs and PAs.
+			 * Partial overlaps are not allowed
+			 */
+
+			unsigned long long mm_end_pa =
+						     mm->base_pa + mm->size - 1;
+
+			int separated_pa =
+				(end_pa < mm->base_pa) || (base_pa > mm_end_pa);
+			int separated_va =
+				(end_va < mm->base_va) || (base_va > mm_end_va);
+
+			assert(separated_va && separated_pa);
+		}
+	}
+
+	mm = mmap; /* Restore pointer to the start of the array */
+
+#endif /* ENABLE_ASSERTIONS */
+
+	/* Find correct place in mmap to insert new region */
+	while (mm->base_va < base_va && mm->size)
+		++mm;
+
+	/*
+	 * If a section is contained inside another one with the same base
+	 * address, it must be placed after the one it is contained in:
+	 *
+	 * 1st |-----------------------|
+	 * 2nd |------------|
+	 * 3rd |------|
+	 *
+	 * This is required for mmap_region_attr() to get the attributes of the
+	 * small region correctly.
+	 */
+	while ((mm->base_va == base_va) && (mm->size > size))
+		++mm;
+
+	/* Make room for new region by moving other regions up by one place */
+	memmove(mm + 1, mm, (uintptr_t)mm_last - (uintptr_t)mm);
+
+	/* Check we haven't lost the empty sentinal from the end of the array */
+	assert(mm_last->size == 0);
+
+	mm->base_pa = base_pa;
+	mm->base_va = base_va;
+	mm->size = size;
+	mm->attr = attr;
+
+	if (end_pa > xlat_max_pa)
+		xlat_max_pa = end_pa;
+	if (end_va > xlat_max_va)
+		xlat_max_va = end_va;
+}
+
+void mmap_add(const mmap_region_t *mm)
+{
+	while (mm->size) {
+		mmap_add_region(mm->base_pa, mm->base_va, mm->size, mm->attr);
+		++mm;
+	}
+}
+
+static uint64_t mmap_desc(mmap_attr_t attr, unsigned long long addr_pa,
+							unsigned int level)
+{
+	uint64_t desc;
+	int mem_type;
+
+	/* Make sure that the granularity is fine enough to map this address. */
+	assert((addr_pa & XLAT_BLOCK_MASK(level)) == 0);
+
+	desc = addr_pa;
+	/*
+	 * There are different translation table descriptors for level 3 and the
+	 * rest.
+	 */
+	desc |= (level == XLAT_TABLE_LEVEL_MAX) ? PAGE_DESC : BLOCK_DESC;
+	desc |= (attr & MT_NS) ? LOWER_ATTRS(NS) : 0;
+	desc |= (attr & MT_RW) ? LOWER_ATTRS(AP_RW) : LOWER_ATTRS(AP_RO);
+	desc |= LOWER_ATTRS(ACCESS_FLAG);
+
+	/*
+	 * Deduce shareability domain and executability of the memory region
+	 * from the memory type.
+	 *
+	 * Data accesses to device memory and non-cacheable normal memory are
+	 * coherent for all observers in the system, and correspondingly are
+	 * always treated as being Outer Shareable. Therefore, for these 2 types
+	 * of memory, it is not strictly needed to set the shareability field
+	 * in the translation tables.
+	 */
+	mem_type = MT_TYPE(attr);
+	if (mem_type == MT_DEVICE) {
+		desc |= LOWER_ATTRS(ATTR_DEVICE_INDEX | OSH);
+		/*
+		 * Always map device memory as execute-never.
+		 * This is to avoid the possibility of a speculative instruction
+		 * fetch, which could be an issue if this memory region
+		 * corresponds to a read-sensitive peripheral.
+		 */
+		desc |= execute_never_mask;
+
+	} else { /* Normal memory */
+		/*
+		 * Always map read-write normal memory as execute-never.
+		 * (Trusted Firmware doesn't self-modify its code, therefore
+		 * R/W memory is reserved for data storage, which must not be
+		 * executable.)
+		 * Note that setting the XN bit here is for consistency only.
+		 * The function that enables the MMU sets the SCTLR_ELx.WXN bit,
+		 * which makes any writable memory region to be treated as
+		 * execute-never, regardless of the value of the XN bit in the
+		 * translation table.
+		 *
+		 * For read-only memory, rely on the MT_EXECUTE/MT_EXECUTE_NEVER
+		 * attribute to figure out the value of the XN bit.
+		 */
+		if ((attr & MT_RW) || (attr & MT_EXECUTE_NEVER)) {
+			desc |= execute_never_mask;
+		}
+
+		if (mem_type == MT_MEMORY) {
+			desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
+		} else {
+			assert(mem_type == MT_NON_CACHEABLE);
+			desc |= LOWER_ATTRS(ATTR_NON_CACHEABLE_INDEX | OSH);
+		}
+	}
+
+	debug_print((mem_type == MT_MEMORY) ? "MEM" :
+		((mem_type == MT_NON_CACHEABLE) ? "NC" : "DEV"));
+	debug_print(attr & MT_RW ? "-RW" : "-RO");
+	debug_print(attr & MT_NS ? "-NS" : "-S");
+	debug_print(attr & MT_EXECUTE_NEVER ? "-XN" : "-EXEC");
+	return desc;
+}
+
+/*
+ * Look for the innermost region that contains the area at `base_va` with size
+ * `size`. Populate *attr with the attributes of this region.
+ *
+ * On success, this function returns 0.
+ * If there are partial overlaps (meaning that a smaller size is needed) or if
+ * the region can't be found in the given area, it returns -1. In this case the
+ * value pointed by attr should be ignored by the caller.
+ */
+static int mmap_region_attr(mmap_region_t *mm, uintptr_t base_va,
+					size_t size, mmap_attr_t *attr)
+{
+	/* Don't assume that the area is contained in the first region */
+	int ret = -1;
+
+	/*
+	 * Get attributes from last (innermost) region that contains the
+	 * requested area. Don't stop as soon as one region doesn't contain it
+	 * because there may be other internal regions that contain this area:
+	 *
+	 * |-----------------------------1-----------------------------|
+	 * |----2----|     |-------3-------|    |----5----|
+	 *                   |--4--|
+	 *
+	 *                   |---| <- Area we want the attributes of.
+	 *
+	 * In this example, the area is contained in regions 1, 3 and 4 but not
+	 * in region 2. The loop shouldn't stop at region 2 as inner regions
+	 * have priority over outer regions, it should stop at region 5.
+	 */
+	for (;; ++mm) {
+
+		if (!mm->size)
+			return ret; /* Reached end of list */
+
+		if (mm->base_va > base_va + size - 1)
+			return ret; /* Next region is after area so end */
+
+		if (mm->base_va + mm->size - 1 < base_va)
+			continue; /* Next region has already been overtaken */
+
+		if (!ret && mm->attr == *attr)
+			continue; /* Region doesn't override attribs so skip */
+
+		if (mm->base_va > base_va ||
+			mm->base_va + mm->size - 1 < base_va + size - 1)
+			return -1; /* Region doesn't fully cover our area */
+
+		*attr = mm->attr;
+		ret = 0;
+	}
+	return ret;
+}
+
+static mmap_region_t *init_xlation_table_inner(mmap_region_t *mm,
+					uintptr_t base_va,
+					uint64_t *table,
+					unsigned int level)
+{
+	assert(level >= XLAT_TABLE_LEVEL_MIN && level <= XLAT_TABLE_LEVEL_MAX);
+
+	unsigned int level_size_shift =
+		       L0_XLAT_ADDRESS_SHIFT - level * XLAT_TABLE_ENTRIES_SHIFT;
+	u_register_t level_size = (u_register_t)1 << level_size_shift;
+	u_register_t level_index_mask =
+		((u_register_t)XLAT_TABLE_ENTRIES_MASK) << level_size_shift;
+
+	debug_print("New xlat table:\n");
+
+	do  {
+		uint64_t desc = UNSET_DESC;
+
+		if (!mm->size) {
+			/* Done mapping regions; finish zeroing the table */
+			desc = INVALID_DESC;
+		} else if (mm->base_va + mm->size - 1 < base_va) {
+			/* This area is after the region so get next region */
+			++mm;
+			continue;
+		}
+
+		debug_print("%s VA:%p size:0x%llx ", get_level_spacer(level),
+			(void *)base_va, (unsigned long long)level_size);
+
+		if (mm->base_va > base_va + level_size - 1) {
+			/* Next region is after this area. Nothing to map yet */
+			desc = INVALID_DESC;
+		/* Make sure that the current level allows block descriptors */
+		} else if (level >= XLAT_BLOCK_LEVEL_MIN) {
+			/*
+			 * Try to get attributes of this area. It will fail if
+			 * there are partially overlapping regions. On success,
+			 * it will return the innermost region's attributes.
+			 */
+			mmap_attr_t attr;
+			int r = mmap_region_attr(mm, base_va, level_size, &attr);
+
+			if (!r) {
+				desc = mmap_desc(attr,
+					base_va - mm->base_va + mm->base_pa,
+					level);
+			}
+		}
+
+		if (desc == UNSET_DESC) {
+			/* Area not covered by a region so need finer table */
+			uint64_t *new_table = xlat_tables[next_xlat++];
+			assert(next_xlat <= MAX_XLAT_TABLES);
+			desc = TABLE_DESC | (uintptr_t)new_table;
+
+			/* Recurse to fill in new table */
+			mm = init_xlation_table_inner(mm, base_va,
+						new_table, level+1);
+		}
+
+		debug_print("\n");
+
+		*table++ = desc;
+		base_va += level_size;
+	} while ((base_va & level_index_mask) &&
+		 (base_va - 1 < PLAT_VIRT_ADDR_SPACE_SIZE - 1));
+
+	return mm;
+}
+
+void init_xlation_table(uintptr_t base_va, uint64_t *table,
+			unsigned int level, uintptr_t *max_va,
+			unsigned long long *max_pa)
+{
+	execute_never_mask = xlat_arch_get_xn_desc(xlat_arch_current_el());
+	init_xlation_table_inner(mmap, base_va, table, level);
+	*max_va = xlat_max_va;
+	*max_pa = xlat_max_pa;
+}
diff --git a/lib/xlat_tables/xlat_tables_private.h b/lib/xlat_tables/xlat_tables_private.h
new file mode 100644
index 00000000..50d6bd59
--- /dev/null
+++ b/lib/xlat_tables/xlat_tables_private.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __XLAT_TABLES_PRIVATE_H__
+#define __XLAT_TABLES_PRIVATE_H__
+
+#include <cassert.h>
+#include <platform_def.h>
+#include <xlat_tables_arch.h>
+
+/*
+ * If the platform hasn't defined a physical and a virtual address space size
+ * default to ADDR_SPACE_SIZE.
+ */
+#if ERROR_DEPRECATED
+# ifdef ADDR_SPACE_SIZE
+#  error "ADDR_SPACE_SIZE is deprecated. Use PLAT_xxx_ADDR_SPACE_SIZE instead."
+# endif
+#elif defined(ADDR_SPACE_SIZE)
+# ifndef PLAT_PHY_ADDR_SPACE_SIZE
+#  define PLAT_PHY_ADDR_SPACE_SIZE	ADDR_SPACE_SIZE
+# endif
+# ifndef PLAT_VIRT_ADDR_SPACE_SIZE
+#  define PLAT_VIRT_ADDR_SPACE_SIZE	ADDR_SPACE_SIZE
+# endif
+#endif
+
+CASSERT(CHECK_VIRT_ADDR_SPACE_SIZE(PLAT_VIRT_ADDR_SPACE_SIZE),
+	assert_valid_virt_addr_space_size);
+
+CASSERT(CHECK_PHY_ADDR_SPACE_SIZE(PLAT_PHY_ADDR_SPACE_SIZE),
+	assert_valid_phy_addr_space_size);
+
+/* Alias to retain compatibility with the old #define name */
+#define XLAT_BLOCK_LEVEL_MIN	MIN_LVL_BLOCK_DESC
+
+void print_mmap(void);
+
+/* Returns the current Exception Level. The returned EL must be 1 or higher. */
+int xlat_arch_current_el(void);
+
+/*
+ * Returns the bit mask that has to be ORed to the rest of a translation table
+ * descriptor so that execution of code is prohibited at the given Exception
+ * Level.
+ */
+uint64_t xlat_arch_get_xn_desc(int el);
+
+void init_xlation_table(uintptr_t base_va, uint64_t *table,
+			unsigned int level, uintptr_t *max_va,
+			unsigned long long *max_pa);
+
+#endif /* __XLAT_TABLES_PRIVATE_H__ */
diff --git a/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
new file mode 100644
index 00000000..642f799a
--- /dev/null
+++ b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <cassert.h>
+#include <platform_def.h>
+#include <utils.h>
+#include <utils_def.h>
+#include <xlat_tables_v2.h>
+#include "../xlat_tables_private.h"
+
+#if ENABLE_ASSERTIONS
+unsigned long long xlat_arch_get_max_supported_pa(void)
+{
+	/* Physical address space size for long descriptor format. */
+	return (1ull << 40) - 1ull;
+}
+#endif /* ENABLE_ASSERTIONS*/
+
+int is_mmu_enabled_ctx(const xlat_ctx_t *ctx __unused)
+{
+	return (read_sctlr() & SCTLR_M_BIT) != 0;
+}
+
+void xlat_arch_tlbi_va(uintptr_t va)
+{
+	/*
+	 * Ensure the translation table write has drained into memory before
+	 * invalidating the TLB entry.
+	 */
+	dsbishst();
+
+	tlbimvaais(TLBI_ADDR(va));
+}
+
+void xlat_arch_tlbi_va_regime(uintptr_t va, xlat_regime_t xlat_regime __unused)
+{
+	/*
+	 * Ensure the translation table write has drained into memory before
+	 * invalidating the TLB entry.
+	 */
+	dsbishst();
+
+	tlbimvaais(TLBI_ADDR(va));
+}
+
+void xlat_arch_tlbi_va_sync(void)
+{
+	/* Invalidate all entries from branch predictors. */
+	bpiallis();
+
+	/*
+	 * A TLB maintenance instruction can complete at any time after
+	 * it is issued, but is only guaranteed to be complete after the
+	 * execution of DSB by the PE that executed the TLB maintenance
+	 * instruction. After the TLB invalidate instruction is
+	 * complete, no new memory accesses using the invalidated TLB
+	 * entries will be observed by any observer of the system
+	 * domain. See section D4.8.2 of the ARMv8 (issue k), paragraph
+	 * "Ordering and completion of TLB maintenance instructions".
+	 */
+	dsbish();
+
+	/*
+	 * The effects of a completed TLB maintenance instruction are
+	 * only guaranteed to be visible on the PE that executed the
+	 * instruction after the execution of an ISB instruction by the
+	 * PE that executed the TLB maintenance instruction.
+	 */
+	isb();
+}
+
+int xlat_arch_current_el(void)
+{
+	/*
+	 * If EL3 is in AArch32 mode, all secure PL1 modes (Monitor, System,
+	 * SVC, Abort, UND, IRQ and FIQ modes) execute at EL3.
+	 */
+	return 3;
+}
+
+/*******************************************************************************
+ * Function for enabling the MMU in Secure PL1, assuming that the page tables
+ * have already been created.
+ ******************************************************************************/
+void enable_mmu_arch(unsigned int flags,
+		uint64_t *base_table,
+		unsigned long long max_pa,
+		uintptr_t max_va)
+{
+	u_register_t mair0, ttbcr, sctlr;
+	uint64_t ttbr0;
+
+	assert(IS_IN_SECURE());
+
+	sctlr = read_sctlr();
+	assert((sctlr & SCTLR_M_BIT) == 0);
+
+	/* Invalidate TLBs at the current exception level */
+	tlbiall();
+
+	/* Set attributes in the right indices of the MAIR */
+	mair0 = MAIR0_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);
+	mair0 |= MAIR0_ATTR_SET(ATTR_IWBWA_OWBWA_NTR,
+			ATTR_IWBWA_OWBWA_NTR_INDEX);
+	mair0 |= MAIR0_ATTR_SET(ATTR_NON_CACHEABLE,
+			ATTR_NON_CACHEABLE_INDEX);
+
+	/*
+	 * Configure the control register for stage 1 of the PL1&0 translation
+	 * regime.
+	 */
+
+	/* Use the Long-descriptor translation table format. */
+	ttbcr = TTBCR_EAE_BIT;
+
+	/*
+	 * Disable translation table walk for addresses that are translated
+	 * using TTBR1. Therefore, only TTBR0 is used.
+	 */
+	ttbcr |= TTBCR_EPD1_BIT;
+
+	/*
+	 * Limit the input address ranges and memory region sizes translated
+	 * using TTBR0 to the given virtual address space size, if smaller than
+	 * 32 bits.
+	 */
+	if (max_va != UINT32_MAX) {
+		uintptr_t virtual_addr_space_size = max_va + 1;
+		assert(CHECK_VIRT_ADDR_SPACE_SIZE(virtual_addr_space_size));
+		/*
+		 * __builtin_ctzll(0) is undefined but here we are guaranteed
+		 * that virtual_addr_space_size is in the range [1, UINT32_MAX].
+		 */
+		ttbcr |= 32 - __builtin_ctzll(virtual_addr_space_size);
+	}
+
+	/*
+	 * Set the cacheability and shareability attributes for memory
+	 * associated with translation table walks using TTBR0.
+	 */
+	if (flags & XLAT_TABLE_NC) {
+		/* Inner & outer non-cacheable non-shareable. */
+		ttbcr |= TTBCR_SH0_NON_SHAREABLE | TTBCR_RGN0_OUTER_NC |
+			TTBCR_RGN0_INNER_NC;
+	} else {
+		/* Inner & outer WBWA & shareable. */
+		ttbcr |= TTBCR_SH0_INNER_SHAREABLE | TTBCR_RGN0_OUTER_WBA |
+			TTBCR_RGN0_INNER_WBA;
+	}
+
+	/* Set TTBR0 bits as well */
+	ttbr0 = (uint64_t)(uintptr_t) base_table;
+#if ARM_ARCH_AT_LEAST(8, 2)
+	/*
+	 * Enable CnP bit so as to share page tables with all PEs.
+	 * Mandatory for ARMv8.2 implementations.
+	 */
+	ttbr0 |= TTBR_CNP_BIT;
+#endif
+
+	/* Now program the relevant system registers */
+	write_mair0(mair0);
+	write_ttbcr(ttbcr);
+	write64_ttbr0(ttbr0);
+	write64_ttbr1(0);
+
+	/*
+	 * Ensure all translation table writes have drained
+	 * into memory, the TLB invalidation is complete,
+	 * and translation register writes are committed
+	 * before enabling the MMU
+	 */
+	dsbish();
+	isb();
+
+	sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;
+
+	if (flags & DISABLE_DCACHE)
+		sctlr &= ~SCTLR_C_BIT;
+	else
+		sctlr |= SCTLR_C_BIT;
+
+	write_sctlr(sctlr);
+
+	/* Ensure the MMU enable takes effect immediately */
+	isb();
+}
diff --git a/lib/xlat_tables_v2/aarch32/xlat_tables_arch_private.h b/lib/xlat_tables_v2/aarch32/xlat_tables_arch_private.h
new file mode 100644
index 00000000..509395d8
--- /dev/null
+++ b/lib/xlat_tables_v2/aarch32/xlat_tables_arch_private.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __XLAT_TABLES_ARCH_PRIVATE_H__
+#define __XLAT_TABLES_ARCH_PRIVATE_H__
+
+#include <xlat_tables_defs.h>
+#include <xlat_tables_v2.h>
+
+/*
+ * Return the execute-never mask that will prevent instruction fetch at the
+ * given translation regime.
+ */
+static inline uint64_t xlat_arch_regime_get_xn_desc(xlat_regime_t regime __unused)
+{
+	return UPPER_ATTRS(XN);
+}
+
+#endif /* __XLAT_TABLES_ARCH_PRIVATE_H__ */
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
new file mode 100644
index 00000000..eda38d34
--- /dev/null
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bl_common.h>
+#include <cassert.h>
+#include <common_def.h>
+#include <sys/types.h>
+#include <utils.h>
+#include <utils_def.h>
+#include <xlat_tables_v2.h>
+#include "../xlat_tables_private.h"
+
+static unsigned long long calc_physical_addr_size_bits(
+					unsigned long long max_addr)
+{
+	/* Physical address can't exceed 48 bits */
+	assert((max_addr & ADDR_MASK_48_TO_63) == 0);
+
+	/* 48 bits address */
+	if (max_addr & ADDR_MASK_44_TO_47)
+		return TCR_PS_BITS_256TB;
+
+	/* 44 bits address */
+	if (max_addr & ADDR_MASK_42_TO_43)
+		return TCR_PS_BITS_16TB;
+
+	/* 42 bits address */
+	if (max_addr & ADDR_MASK_40_TO_41)
+		return TCR_PS_BITS_4TB;
+
+	/* 40 bits address */
+	if (max_addr & ADDR_MASK_36_TO_39)
+		return TCR_PS_BITS_1TB;
+
+	/* 36 bits address */
+	if (max_addr & ADDR_MASK_32_TO_35)
+		return TCR_PS_BITS_64GB;
+
+	return TCR_PS_BITS_4GB;
+}
+
+#if ENABLE_ASSERTIONS
+/* Physical Address ranges supported in the AArch64 Memory Model */
+static const unsigned int pa_range_bits_arr[] = {
+	PARANGE_0000, PARANGE_0001, PARANGE_0010, PARANGE_0011, PARANGE_0100,
+	PARANGE_0101
+};
+
+unsigned long long xlat_arch_get_max_supported_pa(void)
+{
+	u_register_t pa_range = read_id_aa64mmfr0_el1() &
+						ID_AA64MMFR0_EL1_PARANGE_MASK;
+
+	/* All other values are reserved */
+	assert(pa_range < ARRAY_SIZE(pa_range_bits_arr));
+
+	return (1ull << pa_range_bits_arr[pa_range]) - 1ull;
+}
+#endif /* ENABLE_ASSERTIONS*/
+
+int is_mmu_enabled_ctx(const xlat_ctx_t *ctx)
+{
+	if (ctx->xlat_regime == EL1_EL0_REGIME) {
+		assert(xlat_arch_current_el() >= 1);
+		return (read_sctlr_el1() & SCTLR_M_BIT) != 0;
+	} else {
+		assert(ctx->xlat_regime == EL3_REGIME);
+		assert(xlat_arch_current_el() >= 3);
+		return (read_sctlr_el3() & SCTLR_M_BIT) != 0;
+	}
+}
+
+
+void xlat_arch_tlbi_va(uintptr_t va)
+{
+#if IMAGE_EL == 1
+	assert(IS_IN_EL(1));
+	xlat_arch_tlbi_va_regime(va, EL1_EL0_REGIME);
+#elif IMAGE_EL == 3
+	assert(IS_IN_EL(3));
+	xlat_arch_tlbi_va_regime(va, EL3_REGIME);
+#endif
+}
+
+void xlat_arch_tlbi_va_regime(uintptr_t va, xlat_regime_t xlat_regime)
+{
+	/*
+	 * Ensure the translation table write has drained into memory before
+	 * invalidating the TLB entry.
+	 */
+	dsbishst();
+
+	/*
+	 * This function only supports invalidation of TLB entries for the EL3
+	 * and EL1&0 translation regimes.
+	 *
+	 * Also, it is architecturally UNDEFINED to invalidate TLBs of a higher
+	 * exception level (see section D4.9.2 of the ARM ARM rev B.a).
+	 */
+	if (xlat_regime == EL1_EL0_REGIME) {
+		assert(xlat_arch_current_el() >= 1);
+		tlbivaae1is(TLBI_ADDR(va));
+	} else {
+		assert(xlat_regime == EL3_REGIME);
+		assert(xlat_arch_current_el() >= 3);
+		tlbivae3is(TLBI_ADDR(va));
+	}
+}
+
+void xlat_arch_tlbi_va_sync(void)
+{
+	/*
+	 * A TLB maintenance instruction can complete at any time after
+	 * it is issued, but is only guaranteed to be complete after the
+	 * execution of DSB by the PE that executed the TLB maintenance
+	 * instruction. After the TLB invalidate instruction is
+	 * complete, no new memory accesses using the invalidated TLB
+	 * entries will be observed by any observer of the system
+	 * domain. See section D4.8.2 of the ARMv8 (issue k), paragraph
+	 * "Ordering and completion of TLB maintenance instructions".
+	 */
+	dsbish();
+
+	/*
+	 * The effects of a completed TLB maintenance instruction are
+	 * only guaranteed to be visible on the PE that executed the
+	 * instruction after the execution of an ISB instruction by the
+	 * PE that executed the TLB maintenance instruction.
+	 */
+	isb();
+}
+
+int xlat_arch_current_el(void)
+{
+	int el = GET_EL(read_CurrentEl());
+
+	assert(el > 0);
+
+	return el;
+}
+
+/*******************************************************************************
+ * Macro generating the code for the function enabling the MMU in the given
+ * exception level, assuming that the pagetables have already been created.
+ *
+ *   _el:		Exception level at which the function will run
+ *   _tlbi_fct:		Function to invalidate the TLBs at the current
+ *			exception level
+ ******************************************************************************/
+#define DEFINE_ENABLE_MMU_EL(_el, _tlbi_fct)				\
+	static void enable_mmu_internal_el##_el(int flags,		\
+						uint64_t mair,		\
+						uint64_t tcr,		\
+						uint64_t ttbr)		\
+	{								\
+		uint32_t sctlr = read_sctlr_el##_el();			\
+		assert((sctlr & SCTLR_M_BIT) == 0);			\
+									\
+		/* Invalidate TLBs at the current exception level */	\
+		_tlbi_fct();						\
+									\
+		write_mair_el##_el(mair);				\
+		write_tcr_el##_el(tcr);					\
+									\
+		/* Set TTBR bits as well */				\
+		if (ARM_ARCH_AT_LEAST(8, 2)) {				\
+			/* Enable CnP bit so as to share page tables */	\
+			/* with all PEs. This is mandatory for */	\
+			/* ARMv8.2 implementations. */			\
+			ttbr |= TTBR_CNP_BIT;				\
+		}							\
+		write_ttbr0_el##_el(ttbr);				\
+									\
+		/* Ensure all translation table writes have drained */	\
+		/* into memory, the TLB invalidation is complete, */	\
+		/* and translation register writes are committed */	\
+		/* before enabling the MMU */				\
+		dsbish();						\
+		isb();							\
+									\
+		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;			\
+		if (flags & DISABLE_DCACHE)				\
+			sctlr &= ~SCTLR_C_BIT;				\
+		else							\
+			sctlr |= SCTLR_C_BIT;				\
+									\
+		write_sctlr_el##_el(sctlr);				\
+									\
+		/* Ensure the MMU enable takes effect immediately */	\
+		isb();							\
+	}
+
+/* Define EL1 and EL3 variants of the function enabling the MMU */
+#if IMAGE_EL == 1
+DEFINE_ENABLE_MMU_EL(1, tlbivmalle1)
+#elif IMAGE_EL == 3
+DEFINE_ENABLE_MMU_EL(3, tlbialle3)
+#endif
+
+void enable_mmu_arch(unsigned int flags,
+		uint64_t *base_table,
+		unsigned long long max_pa,
+		uintptr_t max_va)
+{
+	uint64_t mair, ttbr, tcr;
+
+	/* Set attributes in the right indices of the MAIR. */
+	mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);
+	mair |= MAIR_ATTR_SET(ATTR_IWBWA_OWBWA_NTR, ATTR_IWBWA_OWBWA_NTR_INDEX);
+	mair |= MAIR_ATTR_SET(ATTR_NON_CACHEABLE, ATTR_NON_CACHEABLE_INDEX);
+
+	ttbr = (uint64_t) base_table;
+
+	/*
+	 * Set TCR bits as well.
+	 */
+
+	/*
+	 * Limit the input address ranges and memory region sizes translated
+	 * using TTBR0 to the given virtual address space size.
+	 */
+	assert(max_va < UINTPTR_MAX);
+	uintptr_t virtual_addr_space_size = max_va + 1;
+	assert(CHECK_VIRT_ADDR_SPACE_SIZE(virtual_addr_space_size));
+	/*
+	 * __builtin_ctzll(0) is undefined but here we are guaranteed that
+	 * virtual_addr_space_size is in the range [1,UINTPTR_MAX].
+	 */
+	tcr = 64 - __builtin_ctzll(virtual_addr_space_size);
+
+	/*
+	 * Set the cacheability and shareability attributes for memory
+	 * associated with translation table walks.
+	 */
+	if (flags & XLAT_TABLE_NC) {
+		/* Inner & outer non-cacheable non-shareable. */
+		tcr |= TCR_SH_NON_SHAREABLE |
+			TCR_RGN_OUTER_NC | TCR_RGN_INNER_NC;
+	} else {
+		/* Inner & outer WBWA & shareable. */
+		tcr |= TCR_SH_INNER_SHAREABLE |
+			TCR_RGN_OUTER_WBA | TCR_RGN_INNER_WBA;
+	}
+
+	/*
+	 * It is safer to restrict the max physical address accessible by the
+	 * hardware as much as possible.
+	 */
+	unsigned long long tcr_ps_bits = calc_physical_addr_size_bits(max_pa);
+
+#if IMAGE_EL == 1
+	assert(IS_IN_EL(1));
+	/*
+	 * TCR_EL1.EPD1: Disable translation table walk for addresses that are
+	 * translated using TTBR1_EL1.
+	 */
+	tcr |= TCR_EPD1_BIT | (tcr_ps_bits << TCR_EL1_IPS_SHIFT);
+	enable_mmu_internal_el1(flags, mair, tcr, ttbr);
+#elif IMAGE_EL == 3
+	assert(IS_IN_EL(3));
+	tcr |= TCR_EL3_RES1 | (tcr_ps_bits << TCR_EL3_PS_SHIFT);
+	enable_mmu_internal_el3(flags, mair, tcr, ttbr);
+#endif
+}
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch_private.h b/lib/xlat_tables_v2/aarch64/xlat_tables_arch_private.h
new file mode 100644
index 00000000..d201590a
--- /dev/null
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch_private.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __XLAT_TABLES_ARCH_PRIVATE_H__
+#define __XLAT_TABLES_ARCH_PRIVATE_H__
+
+#include <assert.h>
+#include <xlat_tables_defs.h>
+#include <xlat_tables_v2.h>
+
+/*
+ * Return the execute-never mask that will prevent instruction fetch at all ELs
+ * that are part of the given translation regime.
+ */
+static inline uint64_t xlat_arch_regime_get_xn_desc(xlat_regime_t regime)
+{
+	if (regime == EL1_EL0_REGIME) {
+		return UPPER_ATTRS(UXN) | UPPER_ATTRS(PXN);
+	} else {
+		assert(regime == EL3_REGIME);
+		return UPPER_ATTRS(XN);
+	}
+}
+
+#endif /* __XLAT_TABLES_ARCH_PRIVATE_H__ */
diff --git a/lib/xlat_tables_v2/xlat_tables.mk b/lib/xlat_tables_v2/xlat_tables.mk
new file mode 100644
index 00000000..06dd844a
--- /dev/null
+++ b/lib/xlat_tables_v2/xlat_tables.mk
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+XLAT_TABLES_LIB_SRCS	:=	$(addprefix lib/xlat_tables_v2/,	\
+				${ARCH}/xlat_tables_arch.c		\
+				xlat_tables_internal.c)
+
+INCLUDES		+=	-Ilib/xlat_tables_v2/${ARCH}
diff --git a/lib/xlat_tables_v2/xlat_tables_internal.c b/lib/xlat_tables_v2/xlat_tables_internal.c
new file mode 100644
index 00000000..0acfacbf
--- /dev/null
+++ b/lib/xlat_tables_v2/xlat_tables_internal.c
@@ -0,0 +1,1662 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <common_def.h>
+#include <debug.h>
+#include <errno.h>
+#include <platform_def.h>
+#include <string.h>
+#include <types.h>
+#include <utils.h>
+#include <xlat_tables_arch_private.h>
+#include <xlat_tables_defs.h>
+#include <xlat_tables_v2.h>
+
+#include "xlat_tables_private.h"
+
+/*
+ * Each platform can define the size of its physical and virtual address spaces.
+ * If the platform hasn't defined one or both of them, default to
+ * ADDR_SPACE_SIZE. The latter is deprecated, though.
+ */
+#if ERROR_DEPRECATED
+# ifdef ADDR_SPACE_SIZE
+#  error "ADDR_SPACE_SIZE is deprecated. Use PLAT_xxx_ADDR_SPACE_SIZE instead."
+# endif
+#elif defined(ADDR_SPACE_SIZE)
+# ifndef PLAT_PHY_ADDR_SPACE_SIZE
+#  define PLAT_PHY_ADDR_SPACE_SIZE	ADDR_SPACE_SIZE
+# endif
+# ifndef PLAT_VIRT_ADDR_SPACE_SIZE
+#  define PLAT_VIRT_ADDR_SPACE_SIZE	ADDR_SPACE_SIZE
+# endif
+#endif
+
+/*
+ * Allocate and initialise the default translation context for the BL image
+ * currently executing.
+ */
+REGISTER_XLAT_CONTEXT(tf, MAX_MMAP_REGIONS, MAX_XLAT_TABLES,
+		PLAT_VIRT_ADDR_SPACE_SIZE, PLAT_PHY_ADDR_SPACE_SIZE);
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+
+/*
+ * The following functions assume that they will be called using subtables only.
+ * The base table can't be unmapped, so it is not needed to do any special
+ * handling for it.
+ */
+
+/*
+ * Returns the index of the array corresponding to the specified translation
+ * table.
+ */
+static int xlat_table_get_index(xlat_ctx_t *ctx, const uint64_t *table)
+{
+	for (unsigned int i = 0; i < ctx->tables_num; i++)
+		if (ctx->tables[i] == table)
+			return i;
+
+	/*
+	 * Maybe we were asked to get the index of the base level table, which
+	 * should never happen.
+	 */
+	assert(0);
+
+	return -1;
+}
+
+/* Returns a pointer to an empty translation table. */
+static uint64_t *xlat_table_get_empty(xlat_ctx_t *ctx)
+{
+	for (unsigned int i = 0; i < ctx->tables_num; i++)
+		if (ctx->tables_mapped_regions[i] == 0)
+			return ctx->tables[i];
+
+	return NULL;
+}
+
+/* Increments region count for a given table. */
+static void xlat_table_inc_regions_count(xlat_ctx_t *ctx, const uint64_t *table)
+{
+	ctx->tables_mapped_regions[xlat_table_get_index(ctx, table)]++;
+}
+
+/* Decrements region count for a given table. */
+static void xlat_table_dec_regions_count(xlat_ctx_t *ctx, const uint64_t *table)
+{
+	ctx->tables_mapped_regions[xlat_table_get_index(ctx, table)]--;
+}
+
+/* Returns 0 if the speficied table isn't empty, otherwise 1. */
+static int xlat_table_is_empty(xlat_ctx_t *ctx, const uint64_t *table)
+{
+	return !ctx->tables_mapped_regions[xlat_table_get_index(ctx, table)];
+}
+
+#else /* PLAT_XLAT_TABLES_DYNAMIC */
+
+/* Returns a pointer to the first empty translation table. */
+static uint64_t *xlat_table_get_empty(xlat_ctx_t *ctx)
+{
+	assert(ctx->next_table < ctx->tables_num);
+
+	return ctx->tables[ctx->next_table++];
+}
+
+#endif /* PLAT_XLAT_TABLES_DYNAMIC */
+
+/*
+ * Returns a block/page table descriptor for the given level and attributes.
+ */
+uint64_t xlat_desc(const xlat_ctx_t *ctx, mmap_attr_t attr,
+		   unsigned long long addr_pa, int level)
+{
+	uint64_t desc;
+	int mem_type;
+
+	/* Make sure that the granularity is fine enough to map this address. */
+	assert((addr_pa & XLAT_BLOCK_MASK(level)) == 0);
+
+	desc = addr_pa;
+	/*
+	 * There are different translation table descriptors for level 3 and the
+	 * rest.
+	 */
+	desc |= (level == XLAT_TABLE_LEVEL_MAX) ? PAGE_DESC : BLOCK_DESC;
+	/*
+	 * Always set the access flag, as TF doesn't manage access flag faults.
+	 * Deduce other fields of the descriptor based on the MT_NS and MT_RW
+	 * memory region attributes.
+	 */
+	desc |= LOWER_ATTRS(ACCESS_FLAG);
+
+	desc |= (attr & MT_NS) ? LOWER_ATTRS(NS) : 0;
+	desc |= (attr & MT_RW) ? LOWER_ATTRS(AP_RW) : LOWER_ATTRS(AP_RO);
+
+	/*
+	 * Do not allow unprivileged access when the mapping is for a privileged
+	 * EL. For translation regimes that do not have mappings for access for
+	 * lower exception levels, set AP[2] to AP_NO_ACCESS_UNPRIVILEGED.
+	 */
+	if (ctx->xlat_regime == EL1_EL0_REGIME) {
+		if (attr & MT_USER) {
+			/* EL0 mapping requested, so we give User access */
+			desc |= LOWER_ATTRS(AP_ACCESS_UNPRIVILEGED);
+		} else {
+			/* EL1 mapping requested, no User access granted */
+			desc |= LOWER_ATTRS(AP_NO_ACCESS_UNPRIVILEGED);
+		}
+	} else {
+		assert(ctx->xlat_regime == EL3_REGIME);
+		desc |= LOWER_ATTRS(AP_NO_ACCESS_UNPRIVILEGED);
+	}
+
+	/*
+	 * Deduce shareability domain and executability of the memory region
+	 * from the memory type of the attributes (MT_TYPE).
+	 *
+	 * Data accesses to device memory and non-cacheable normal memory are
+	 * coherent for all observers in the system, and correspondingly are
+	 * always treated as being Outer Shareable. Therefore, for these 2 types
+	 * of memory, it is not strictly needed to set the shareability field
+	 * in the translation tables.
+	 */
+	mem_type = MT_TYPE(attr);
+	if (mem_type == MT_DEVICE) {
+		desc |= LOWER_ATTRS(ATTR_DEVICE_INDEX | OSH);
+		/*
+		 * Always map device memory as execute-never.
+		 * This is to avoid the possibility of a speculative instruction
+		 * fetch, which could be an issue if this memory region
+		 * corresponds to a read-sensitive peripheral.
+		 */
+		desc |= xlat_arch_regime_get_xn_desc(ctx->xlat_regime);
+
+	} else { /* Normal memory */
+		/*
+		 * Always map read-write normal memory as execute-never.
+		 * (Trusted Firmware doesn't self-modify its code, therefore
+		 * R/W memory is reserved for data storage, which must not be
+		 * executable.)
+		 * Note that setting the XN bit here is for consistency only.
+		 * The function that enables the MMU sets the SCTLR_ELx.WXN bit,
+		 * which makes any writable memory region to be treated as
+		 * execute-never, regardless of the value of the XN bit in the
+		 * translation table.
+		 *
+		 * For read-only memory, rely on the MT_EXECUTE/MT_EXECUTE_NEVER
+		 * attribute to figure out the value of the XN bit.  The actual
+		 * XN bit(s) to set in the descriptor depends on the context's
+		 * translation regime and the policy applied in
+		 * xlat_arch_regime_get_xn_desc().
+		 */
+		if ((attr & MT_RW) || (attr & MT_EXECUTE_NEVER)) {
+			desc |= xlat_arch_regime_get_xn_desc(ctx->xlat_regime);
+		}
+
+		if (mem_type == MT_MEMORY) {
+			desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
+		} else {
+			assert(mem_type == MT_NON_CACHEABLE);
+			desc |= LOWER_ATTRS(ATTR_NON_CACHEABLE_INDEX | OSH);
+		}
+	}
+
+	return desc;
+}
+
+/*
+ * Enumeration of actions that can be made when mapping table entries depending
+ * on the previous value in that entry and information about the region being
+ * mapped.
+ */
+typedef enum {
+
+	/* Do nothing */
+	ACTION_NONE,
+
+	/* Write a block (or page, if in level 3) entry. */
+	ACTION_WRITE_BLOCK_ENTRY,
+
+	/*
+	 * Create a new table and write a table entry pointing to it. Recurse
+	 * into it for further processing.
+	 */
+	ACTION_CREATE_NEW_TABLE,
+
+	/*
+	 * There is a table descriptor in this entry, read it and recurse into
+	 * that table for further processing.
+	 */
+	ACTION_RECURSE_INTO_TABLE,
+
+} action_t;
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+
+/*
+ * Recursive function that writes to the translation tables and unmaps the
+ * specified region.
+ */
+static void xlat_tables_unmap_region(xlat_ctx_t *ctx, mmap_region_t *mm,
+				     const uintptr_t table_base_va,
+				     uint64_t *const table_base,
+				     const int table_entries,
+				     const unsigned int level)
+{
+	assert(level >= ctx->base_level && level <= XLAT_TABLE_LEVEL_MAX);
+
+	uint64_t *subtable;
+	uint64_t desc;
+
+	uintptr_t table_idx_va;
+	uintptr_t table_idx_end_va; /* End VA of this entry */
+
+	uintptr_t region_end_va = mm->base_va + mm->size - 1;
+
+	int table_idx;
+
+	if (mm->base_va > table_base_va) {
+		/* Find the first index of the table affected by the region. */
+		table_idx_va = mm->base_va & ~XLAT_BLOCK_MASK(level);
+
+		table_idx = (table_idx_va - table_base_va) >>
+			    XLAT_ADDR_SHIFT(level);
+
+		assert(table_idx < table_entries);
+	} else {
+		/* Start from the beginning of the table. */
+		table_idx_va = table_base_va;
+		table_idx = 0;
+	}
+
+	while (table_idx < table_entries) {
+
+		table_idx_end_va = table_idx_va + XLAT_BLOCK_SIZE(level) - 1;
+
+		desc = table_base[table_idx];
+		uint64_t desc_type = desc & DESC_MASK;
+
+		action_t action = ACTION_NONE;
+
+		if ((mm->base_va <= table_idx_va) &&
+		    (region_end_va >= table_idx_end_va)) {
+
+			/* Region covers all block */
+
+			if (level == 3) {
+				/*
+				 * Last level, only page descriptors allowed,
+				 * erase it.
+				 */
+				assert(desc_type == PAGE_DESC);
+
+				action = ACTION_WRITE_BLOCK_ENTRY;
+			} else {
+				/*
+				 * Other levels can have table descriptors. If
+				 * so, recurse into it and erase descriptors
+				 * inside it as needed. If there is a block
+				 * descriptor, just erase it. If an invalid
+				 * descriptor is found, this table isn't
+				 * actually mapped, which shouldn't happen.
+				 */
+				if (desc_type == TABLE_DESC) {
+					action = ACTION_RECURSE_INTO_TABLE;
+				} else {
+					assert(desc_type == BLOCK_DESC);
+					action = ACTION_WRITE_BLOCK_ENTRY;
+				}
+			}
+
+		} else if ((mm->base_va <= table_idx_end_va) ||
+			   (region_end_va >= table_idx_va)) {
+
+			/*
+			 * Region partially covers block.
+			 *
+			 * It can't happen in level 3.
+			 *
+			 * There must be a table descriptor here, if not there
+			 * was a problem when mapping the region.
+			 */
+
+			assert(level < 3);
+
+			assert(desc_type == TABLE_DESC);
+
+			action = ACTION_RECURSE_INTO_TABLE;
+		}
+
+		if (action == ACTION_WRITE_BLOCK_ENTRY) {
+
+			table_base[table_idx] = INVALID_DESC;
+			xlat_arch_tlbi_va_regime(table_idx_va, ctx->xlat_regime);
+
+		} else if (action == ACTION_RECURSE_INTO_TABLE) {
+
+			subtable = (uint64_t *)(uintptr_t)(desc & TABLE_ADDR_MASK);
+
+			/* Recurse to write into subtable */
+			xlat_tables_unmap_region(ctx, mm, table_idx_va,
+						 subtable, XLAT_TABLE_ENTRIES,
+						 level + 1);
+
+			/*
+			 * If the subtable is now empty, remove its reference.
+			 */
+			if (xlat_table_is_empty(ctx, subtable)) {
+				table_base[table_idx] = INVALID_DESC;
+				xlat_arch_tlbi_va_regime(table_idx_va,
+						ctx->xlat_regime);
+			}
+
+		} else {
+			assert(action == ACTION_NONE);
+		}
+
+		table_idx++;
+		table_idx_va += XLAT_BLOCK_SIZE(level);
+
+		/* If reached the end of the region, exit */
+		if (region_end_va <= table_idx_va)
+			break;
+	}
+
+	if (level > ctx->base_level)
+		xlat_table_dec_regions_count(ctx, table_base);
+}
+
+#endif /* PLAT_XLAT_TABLES_DYNAMIC */
+
+/*
+ * From the given arguments, it decides which action to take when mapping the
+ * specified region.
+ */
+static action_t xlat_tables_map_region_action(const mmap_region_t *mm,
+		const int desc_type, const unsigned long long dest_pa,
+		const uintptr_t table_entry_base_va, const unsigned int level)
+{
+	uintptr_t mm_end_va = mm->base_va + mm->size - 1;
+	uintptr_t table_entry_end_va =
+			table_entry_base_va + XLAT_BLOCK_SIZE(level) - 1;
+
+	/*
+	 * The descriptor types allowed depend on the current table level.
+	 */
+
+	if ((mm->base_va <= table_entry_base_va) &&
+	    (mm_end_va >= table_entry_end_va)) {
+
+		/*
+		 * Table entry is covered by region
+		 * --------------------------------
+		 *
+		 * This means that this table entry can describe the whole
+		 * translation with this granularity in principle.
+		 */
+
+		if (level == 3) {
+			/*
+			 * Last level, only page descriptors are allowed.
+			 */
+			if (desc_type == PAGE_DESC) {
+				/*
+				 * There's another region mapped here, don't
+				 * overwrite.
+				 */
+				return ACTION_NONE;
+			} else {
+				assert(desc_type == INVALID_DESC);
+				return ACTION_WRITE_BLOCK_ENTRY;
+			}
+
+		} else {
+
+			/*
+			 * Other levels. Table descriptors are allowed. Block
+			 * descriptors too, but they have some limitations.
+			 */
+
+			if (desc_type == TABLE_DESC) {
+				/* There's already a table, recurse into it. */
+				return ACTION_RECURSE_INTO_TABLE;
+
+			} else if (desc_type == INVALID_DESC) {
+				/*
+				 * There's nothing mapped here, create a new
+				 * entry.
+				 *
+				 * Check if the destination granularity allows
+				 * us to use a block descriptor or we need a
+				 * finer table for it.
+				 *
+				 * Also, check if the current level allows block
+				 * descriptors. If not, create a table instead.
+				 */
+				if ((dest_pa & XLAT_BLOCK_MASK(level)) ||
+				    (level < MIN_LVL_BLOCK_DESC) ||
+				    (mm->granularity < XLAT_BLOCK_SIZE(level)))
+					return ACTION_CREATE_NEW_TABLE;
+				else
+					return ACTION_WRITE_BLOCK_ENTRY;
+
+			} else {
+				/*
+				 * There's another region mapped here, don't
+				 * overwrite.
+				 */
+				assert(desc_type == BLOCK_DESC);
+
+				return ACTION_NONE;
+			}
+		}
+
+	} else if ((mm->base_va <= table_entry_end_va) ||
+		   (mm_end_va >= table_entry_base_va)) {
+
+		/*
+		 * Region partially covers table entry
+		 * -----------------------------------
+		 *
+		 * This means that this table entry can't describe the whole
+		 * translation, a finer table is needed.
+
+		 * There cannot be partial block overlaps in level 3. If that
+		 * happens, some of the preliminary checks when adding the
+		 * mmap region failed to detect that PA and VA must at least be
+		 * aligned to PAGE_SIZE.
+		 */
+		assert(level < 3);
+
+		if (desc_type == INVALID_DESC) {
+			/*
+			 * The block is not fully covered by the region. Create
+			 * a new table, recurse into it and try to map the
+			 * region with finer granularity.
+			 */
+			return ACTION_CREATE_NEW_TABLE;
+
+		} else {
+			assert(desc_type == TABLE_DESC);
+			/*
+			 * The block is not fully covered by the region, but
+			 * there is already a table here. Recurse into it and
+			 * try to map with finer granularity.
+			 *
+			 * PAGE_DESC for level 3 has the same value as
+			 * TABLE_DESC, but this code can't run on a level 3
+			 * table because there can't be overlaps in level 3.
+			 */
+			return ACTION_RECURSE_INTO_TABLE;
+		}
+	}
+
+	/*
+	 * This table entry is outside of the region specified in the arguments,
+	 * don't write anything to it.
+	 */
+	return ACTION_NONE;
+}
+
+/*
+ * Recursive function that writes to the translation tables and maps the
+ * specified region. On success, it returns the VA of the last byte that was
+ * succesfully mapped. On error, it returns the VA of the next entry that
+ * should have been mapped.
+ */
+static uintptr_t xlat_tables_map_region(xlat_ctx_t *ctx, mmap_region_t *mm,
+				   const uintptr_t table_base_va,
+				   uint64_t *const table_base,
+				   const int table_entries,
+				   const unsigned int level)
+{
+	assert(level >= ctx->base_level && level <= XLAT_TABLE_LEVEL_MAX);
+
+	uintptr_t mm_end_va = mm->base_va + mm->size - 1;
+
+	uintptr_t table_idx_va;
+	unsigned long long table_idx_pa;
+
+	uint64_t *subtable;
+	uint64_t desc;
+
+	int table_idx;
+
+	if (mm->base_va > table_base_va) {
+		/* Find the first index of the table affected by the region. */
+		table_idx_va = mm->base_va & ~XLAT_BLOCK_MASK(level);
+
+		table_idx = (table_idx_va - table_base_va) >>
+			    XLAT_ADDR_SHIFT(level);
+
+		assert(table_idx < table_entries);
+	} else {
+		/* Start from the beginning of the table. */
+		table_idx_va = table_base_va;
+		table_idx = 0;
+	}
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+	if (level > ctx->base_level)
+		xlat_table_inc_regions_count(ctx, table_base);
+#endif
+
+	while (table_idx < table_entries) {
+
+		desc = table_base[table_idx];
+
+		table_idx_pa = mm->base_pa + table_idx_va - mm->base_va;
+
+		action_t action = xlat_tables_map_region_action(mm,
+			desc & DESC_MASK, table_idx_pa, table_idx_va, level);
+
+		if (action == ACTION_WRITE_BLOCK_ENTRY) {
+
+			table_base[table_idx] =
+				xlat_desc(ctx, mm->attr, table_idx_pa, level);
+
+		} else if (action == ACTION_CREATE_NEW_TABLE) {
+
+			subtable = xlat_table_get_empty(ctx);
+			if (subtable == NULL) {
+				/* Not enough free tables to map this region */
+				return table_idx_va;
+			}
+
+			/* Point to new subtable from this one. */
+			table_base[table_idx] = TABLE_DESC | (unsigned long)subtable;
+
+			/* Recurse to write into subtable */
+			uintptr_t end_va = xlat_tables_map_region(ctx, mm, table_idx_va,
+					       subtable, XLAT_TABLE_ENTRIES,
+					       level + 1);
+			if (end_va != table_idx_va + XLAT_BLOCK_SIZE(level) - 1)
+				return end_va;
+
+		} else if (action == ACTION_RECURSE_INTO_TABLE) {
+
+			subtable = (uint64_t *)(uintptr_t)(desc & TABLE_ADDR_MASK);
+			/* Recurse to write into subtable */
+			uintptr_t end_va =  xlat_tables_map_region(ctx, mm, table_idx_va,
+					       subtable, XLAT_TABLE_ENTRIES,
+					       level + 1);
+			if (end_va != table_idx_va + XLAT_BLOCK_SIZE(level) - 1)
+				return end_va;
+
+		} else {
+
+			assert(action == ACTION_NONE);
+
+		}
+
+		table_idx++;
+		table_idx_va += XLAT_BLOCK_SIZE(level);
+
+		/* If reached the end of the region, exit */
+		if (mm_end_va <= table_idx_va)
+			break;
+	}
+
+	return table_idx_va - 1;
+}
+
+void print_mmap(mmap_region_t *const mmap)
+{
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+	tf_printf("mmap:\n");
+	mmap_region_t *mm = mmap;
+
+	while (mm->size) {
+		tf_printf(" VA:%p  PA:0x%llx  size:0x%zx  attr:0x%x",
+				(void *)mm->base_va, mm->base_pa,
+				mm->size, mm->attr);
+		tf_printf(" granularity:0x%zx\n", mm->granularity);
+		++mm;
+	};
+	tf_printf("\n");
+#endif
+}
+
+/*
+ * Function that verifies that a region can be mapped.
+ * Returns:
+ *        0: Success, the mapping is allowed.
+ *   EINVAL: Invalid values were used as arguments.
+ *   ERANGE: The memory limits were surpassed.
+ *   ENOMEM: There is not enough memory in the mmap array.
+ *    EPERM: Region overlaps another one in an invalid way.
+ */
+static int mmap_add_region_check(xlat_ctx_t *ctx, const mmap_region_t *mm)
+{
+	unsigned long long base_pa = mm->base_pa;
+	uintptr_t base_va = mm->base_va;
+	size_t size = mm->size;
+	size_t granularity = mm->granularity;
+
+	unsigned long long end_pa = base_pa + size - 1;
+	uintptr_t end_va = base_va + size - 1;
+
+	if (!IS_PAGE_ALIGNED(base_pa) || !IS_PAGE_ALIGNED(base_va) ||
+			!IS_PAGE_ALIGNED(size))
+		return -EINVAL;
+
+	if ((granularity != XLAT_BLOCK_SIZE(1)) &&
+		(granularity != XLAT_BLOCK_SIZE(2)) &&
+		(granularity != XLAT_BLOCK_SIZE(3))) {
+		return -EINVAL;
+	}
+
+	/* Check for overflows */
+	if ((base_pa > end_pa) || (base_va > end_va))
+		return -ERANGE;
+
+	if ((base_va + (uintptr_t)size - (uintptr_t)1) > ctx->va_max_address)
+		return -ERANGE;
+
+	if ((base_pa + (unsigned long long)size - 1ULL) > ctx->pa_max_address)
+		return -ERANGE;
+
+	/* Check that there is space in the ctx->mmap array */
+	if (ctx->mmap[ctx->mmap_num - 1].size != 0)
+		return -ENOMEM;
+
+	/* Check for PAs and VAs overlaps with all other regions */
+	for (mmap_region_t *mm_cursor = ctx->mmap;
+						mm_cursor->size; ++mm_cursor) {
+
+		uintptr_t mm_cursor_end_va = mm_cursor->base_va
+							+ mm_cursor->size - 1;
+
+		/*
+		 * Check if one of the regions is completely inside the other
+		 * one.
+		 */
+		int fully_overlapped_va =
+			((base_va >= mm_cursor->base_va) &&
+					(end_va <= mm_cursor_end_va)) ||
+
+			((mm_cursor->base_va >= base_va) &&
+						(mm_cursor_end_va <= end_va));
+
+		/*
+		 * Full VA overlaps are only allowed if both regions are
+		 * identity mapped (zero offset) or have the same VA to PA
+		 * offset. Also, make sure that it's not the exact same area.
+		 * This can only be done with static regions.
+		 */
+		if (fully_overlapped_va) {
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+			if ((mm->attr & MT_DYNAMIC) ||
+						(mm_cursor->attr & MT_DYNAMIC))
+				return -EPERM;
+#endif /* PLAT_XLAT_TABLES_DYNAMIC */
+			if ((mm_cursor->base_va - mm_cursor->base_pa) !=
+							(base_va - base_pa))
+				return -EPERM;
+
+			if ((base_va == mm_cursor->base_va) &&
+						(size == mm_cursor->size))
+				return -EPERM;
+
+		} else {
+			/*
+			 * If the regions do not have fully overlapping VAs,
+			 * then they must have fully separated VAs and PAs.
+			 * Partial overlaps are not allowed
+			 */
+
+			unsigned long long mm_cursor_end_pa =
+				     mm_cursor->base_pa + mm_cursor->size - 1;
+
+			int separated_pa =
+				(end_pa < mm_cursor->base_pa) ||
+				(base_pa > mm_cursor_end_pa);
+			int separated_va =
+				(end_va < mm_cursor->base_va) ||
+				(base_va > mm_cursor_end_va);
+
+			if (!(separated_va && separated_pa))
+				return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+void mmap_add_region_ctx(xlat_ctx_t *ctx, const mmap_region_t *mm)
+{
+	mmap_region_t *mm_cursor = ctx->mmap;
+	mmap_region_t *mm_last = mm_cursor + ctx->mmap_num;
+	unsigned long long end_pa = mm->base_pa + mm->size - 1;
+	uintptr_t end_va = mm->base_va + mm->size - 1;
+	int ret;
+
+	/* Ignore empty regions */
+	if (!mm->size)
+		return;
+
+	/* Static regions must be added before initializing the xlat tables. */
+	assert(!ctx->initialized);
+
+	ret = mmap_add_region_check(ctx, mm);
+	if (ret != 0) {
+		ERROR("mmap_add_region_check() failed. error %d\n", ret);
+		assert(0);
+		return;
+	}
+
+	/*
+	 * Find correct place in mmap to insert new region.
+	 *
+	 * 1 - Lower region VA end first.
+	 * 2 - Smaller region size first.
+	 *
+	 * VA  0                                   0xFF
+	 *
+	 * 1st |------|
+	 * 2nd |------------|
+	 * 3rd                 |------|
+	 * 4th                            |---|
+	 * 5th                                   |---|
+	 * 6th                            |----------|
+	 * 7th |-------------------------------------|
+	 *
+	 * This is required for overlapping regions only. It simplifies adding
+	 * regions with the loop in xlat_tables_init_internal because the outer
+	 * ones won't overwrite block or page descriptors of regions added
+	 * previously.
+	 *
+	 * Overlapping is only allowed for static regions.
+	 */
+
+	while ((mm_cursor->base_va + mm_cursor->size - 1) < end_va
+	       && mm_cursor->size)
+		++mm_cursor;
+
+	while ((mm_cursor->base_va + mm_cursor->size - 1 == end_va)
+	       && (mm_cursor->size < mm->size))
+		++mm_cursor;
+
+	/* Make room for new region by moving other regions up by one place */
+	memmove(mm_cursor + 1, mm_cursor,
+		(uintptr_t)mm_last - (uintptr_t)mm_cursor);
+
+	/*
+	 * Check we haven't lost the empty sentinel from the end of the array.
+	 * This shouldn't happen as we have checked in mmap_add_region_check
+	 * that there is free space.
+	 */
+	assert(mm_last->size == 0);
+
+	*mm_cursor = *mm;
+
+	if (end_pa > ctx->max_pa)
+		ctx->max_pa = end_pa;
+	if (end_va > ctx->max_va)
+		ctx->max_va = end_va;
+}
+
+void mmap_add_region(unsigned long long base_pa,
+				uintptr_t base_va,
+				size_t size,
+				mmap_attr_t attr)
+{
+	mmap_region_t mm = MAP_REGION(base_pa, base_va, size, attr);
+	mmap_add_region_ctx(&tf_xlat_ctx, &mm);
+}
+
+
+void mmap_add_ctx(xlat_ctx_t *ctx, const mmap_region_t *mm)
+{
+	while (mm->size) {
+		mmap_add_region_ctx(ctx, mm);
+		mm++;
+	}
+}
+
+void mmap_add(const mmap_region_t *mm)
+{
+	mmap_add_ctx(&tf_xlat_ctx, mm);
+}
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+
+int mmap_add_dynamic_region_ctx(xlat_ctx_t *ctx, mmap_region_t *mm)
+{
+	mmap_region_t *mm_cursor = ctx->mmap;
+	mmap_region_t *mm_last = mm_cursor + ctx->mmap_num;
+	unsigned long long end_pa = mm->base_pa + mm->size - 1;
+	uintptr_t end_va = mm->base_va + mm->size - 1;
+	int ret;
+
+	/* Nothing to do */
+	if (!mm->size)
+		return 0;
+
+	/* Now this region is a dynamic one */
+	mm->attr |= MT_DYNAMIC;
+
+	ret = mmap_add_region_check(ctx, mm);
+	if (ret != 0)
+		return ret;
+
+	/*
+	 * Find the adequate entry in the mmap array in the same way done for
+	 * static regions in mmap_add_region_ctx().
+	 */
+
+	while ((mm_cursor->base_va + mm_cursor->size - 1)
+					< end_va && mm_cursor->size)
+		++mm_cursor;
+
+	while ((mm_cursor->base_va + mm_cursor->size - 1 == end_va)
+				&& (mm_cursor->size < mm->size))
+		++mm_cursor;
+
+	/* Make room for new region by moving other regions up by one place */
+	memmove(mm_cursor + 1, mm_cursor,
+		     (uintptr_t)mm_last - (uintptr_t)mm_cursor);
+
+	/*
+	 * Check we haven't lost the empty sentinal from the end of the array.
+	 * This shouldn't happen as we have checked in mmap_add_region_check
+	 * that there is free space.
+	 */
+	assert(mm_last->size == 0);
+
+	*mm_cursor = *mm;
+
+	/*
+	 * Update the translation tables if the xlat tables are initialized. If
+	 * not, this region will be mapped when they are initialized.
+	 */
+	if (ctx->initialized) {
+		uintptr_t end_va = xlat_tables_map_region(ctx, mm_cursor,
+				0, ctx->base_table, ctx->base_table_entries,
+				ctx->base_level);
+
+		/* Failed to map, remove mmap entry, unmap and return error. */
+		if (end_va != mm_cursor->base_va + mm_cursor->size - 1) {
+			memmove(mm_cursor, mm_cursor + 1,
+				(uintptr_t)mm_last - (uintptr_t)mm_cursor);
+
+			/*
+			 * Check if the mapping function actually managed to map
+			 * anything. If not, just return now.
+			 */
+			if (mm_cursor->base_va >= end_va)
+				return -ENOMEM;
+
+			/*
+			 * Something went wrong after mapping some table
+			 * entries, undo every change done up to this point.
+			 */
+			mmap_region_t unmap_mm = {
+					.base_pa = 0,
+					.base_va = mm->base_va,
+					.size = end_va - mm->base_va,
+					.attr = 0
+			};
+			xlat_tables_unmap_region(ctx, &unmap_mm, 0, ctx->base_table,
+							ctx->base_table_entries, ctx->base_level);
+
+			return -ENOMEM;
+		}
+
+		/*
+		 * Make sure that all entries are written to the memory. There
+		 * is no need to invalidate entries when mapping dynamic regions
+		 * because new table/block/page descriptors only replace old
+		 * invalid descriptors, that aren't TLB cached.
+		 */
+		dsbishst();
+	}
+
+	if (end_pa > ctx->max_pa)
+		ctx->max_pa = end_pa;
+	if (end_va > ctx->max_va)
+		ctx->max_va = end_va;
+
+	return 0;
+}
+
+int mmap_add_dynamic_region(unsigned long long base_pa,
+			    uintptr_t base_va, size_t size, mmap_attr_t attr)
+{
+	mmap_region_t mm = MAP_REGION(base_pa, base_va, size, attr);
+	return mmap_add_dynamic_region_ctx(&tf_xlat_ctx, &mm);
+}
+
+/*
+ * Removes the region with given base Virtual Address and size from the given
+ * context.
+ *
+ * Returns:
+ *        0: Success.
+ *   EINVAL: Invalid values were used as arguments (region not found).
+ *    EPERM: Tried to remove a static region.
+ */
+int mmap_remove_dynamic_region_ctx(xlat_ctx_t *ctx, uintptr_t base_va,
+				   size_t size)
+{
+	mmap_region_t *mm = ctx->mmap;
+	mmap_region_t *mm_last = mm + ctx->mmap_num;
+	int update_max_va_needed = 0;
+	int update_max_pa_needed = 0;
+
+	/* Check sanity of mmap array. */
+	assert(mm[ctx->mmap_num].size == 0);
+
+	while (mm->size) {
+		if ((mm->base_va == base_va) && (mm->size == size))
+			break;
+		++mm;
+	}
+
+	/* Check that the region was found */
+	if (mm->size == 0)
+		return -EINVAL;
+
+	/* If the region is static it can't be removed */
+	if (!(mm->attr & MT_DYNAMIC))
+		return -EPERM;
+
+	/* Check if this region is using the top VAs or PAs. */
+	if ((mm->base_va + mm->size - 1) == ctx->max_va)
+		update_max_va_needed = 1;
+	if ((mm->base_pa + mm->size - 1) == ctx->max_pa)
+		update_max_pa_needed = 1;
+
+	/* Update the translation tables if needed */
+	if (ctx->initialized) {
+		xlat_tables_unmap_region(ctx, mm, 0, ctx->base_table,
+					 ctx->base_table_entries,
+					 ctx->base_level);
+		xlat_arch_tlbi_va_sync();
+	}
+
+	/* Remove this region by moving the rest down by one place. */
+	memmove(mm, mm + 1, (uintptr_t)mm_last - (uintptr_t)mm);
+
+	/* Check if we need to update the max VAs and PAs */
+	if (update_max_va_needed) {
+		ctx->max_va = 0;
+		mm = ctx->mmap;
+		while (mm->size) {
+			if ((mm->base_va + mm->size - 1) > ctx->max_va)
+				ctx->max_va = mm->base_va + mm->size - 1;
+			++mm;
+		}
+	}
+
+	if (update_max_pa_needed) {
+		ctx->max_pa = 0;
+		mm = ctx->mmap;
+		while (mm->size) {
+			if ((mm->base_pa + mm->size - 1) > ctx->max_pa)
+				ctx->max_pa = mm->base_pa + mm->size - 1;
+			++mm;
+		}
+	}
+
+	return 0;
+}
+
+int mmap_remove_dynamic_region(uintptr_t base_va, size_t size)
+{
+	return mmap_remove_dynamic_region_ctx(&tf_xlat_ctx,
+					base_va, size);
+}
+
+#endif /* PLAT_XLAT_TABLES_DYNAMIC */
+
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+
+/* Print the attributes of the specified block descriptor. */
+static void xlat_desc_print(const xlat_ctx_t *ctx, uint64_t desc)
+{
+	int mem_type_index = ATTR_INDEX_GET(desc);
+	xlat_regime_t xlat_regime = ctx->xlat_regime;
+
+	if (mem_type_index == ATTR_IWBWA_OWBWA_NTR_INDEX) {
+		tf_printf("MEM");
+	} else if (mem_type_index == ATTR_NON_CACHEABLE_INDEX) {
+		tf_printf("NC");
+	} else {
+		assert(mem_type_index == ATTR_DEVICE_INDEX);
+		tf_printf("DEV");
+	}
+
+	const char *priv_str = "(PRIV)";
+	const char *user_str = "(USER)";
+
+	/*
+	 * Showing Privileged vs Unprivileged only makes sense for EL1&0
+	 * mappings
+	 */
+	const char *ro_str = "-RO";
+	const char *rw_str = "-RW";
+	const char *no_access_str = "-NOACCESS";
+
+	if (xlat_regime == EL3_REGIME) {
+		/* For EL3, the AP[2] bit is all what matters */
+		tf_printf((desc & LOWER_ATTRS(AP_RO)) ? ro_str : rw_str);
+	} else {
+		const char *ap_str = (desc & LOWER_ATTRS(AP_RO)) ? ro_str : rw_str;
+		tf_printf(ap_str);
+		tf_printf(priv_str);
+		/*
+		 * EL0 can only have the same permissions as EL1 or no
+		 * permissions at all.
+		 */
+		tf_printf((desc & LOWER_ATTRS(AP_ACCESS_UNPRIVILEGED))
+			  ? ap_str : no_access_str);
+		tf_printf(user_str);
+	}
+
+	const char *xn_str = "-XN";
+	const char *exec_str = "-EXEC";
+
+	if (xlat_regime == EL3_REGIME) {
+		/* For EL3, the XN bit is all what matters */
+		tf_printf(LOWER_ATTRS(XN) & desc ? xn_str : exec_str);
+	} else {
+		/* For EL0 and EL1, we need to know who has which rights */
+		tf_printf(LOWER_ATTRS(PXN) & desc ? xn_str : exec_str);
+		tf_printf(priv_str);
+
+		tf_printf(LOWER_ATTRS(UXN) & desc ? xn_str : exec_str);
+		tf_printf(user_str);
+	}
+
+	tf_printf(LOWER_ATTRS(NS) & desc ? "-NS" : "-S");
+}
+
+static const char * const level_spacers[] = {
+	"[LV0] ",
+	"  [LV1] ",
+	"    [LV2] ",
+	"      [LV3] "
+};
+
+static const char *invalid_descriptors_ommited =
+		"%s(%d invalid descriptors omitted)\n";
+
+/*
+ * Recursive function that reads the translation tables passed as an argument
+ * and prints their status.
+ */
+static void xlat_tables_print_internal(xlat_ctx_t *ctx,
+		const uintptr_t table_base_va,
+		uint64_t *const table_base, const int table_entries,
+		const unsigned int level)
+{
+	assert(level <= XLAT_TABLE_LEVEL_MAX);
+
+	uint64_t desc;
+	uintptr_t table_idx_va = table_base_va;
+	int table_idx = 0;
+
+	size_t level_size = XLAT_BLOCK_SIZE(level);
+
+	/*
+	 * Keep track of how many invalid descriptors are counted in a row.
+	 * Whenever multiple invalid descriptors are found, only the first one
+	 * is printed, and a line is added to inform about how many descriptors
+	 * have been omitted.
+	 */
+	int invalid_row_count = 0;
+
+	while (table_idx < table_entries) {
+
+		desc = table_base[table_idx];
+
+		if ((desc & DESC_MASK) == INVALID_DESC) {
+
+			if (invalid_row_count == 0) {
+				tf_printf("%sVA:%p size:0x%zx\n",
+					  level_spacers[level],
+					  (void *)table_idx_va, level_size);
+			}
+			invalid_row_count++;
+
+		} else {
+
+			if (invalid_row_count > 1) {
+				tf_printf(invalid_descriptors_ommited,
+					  level_spacers[level],
+					  invalid_row_count - 1);
+			}
+			invalid_row_count = 0;
+
+			/*
+			 * Check if this is a table or a block. Tables are only
+			 * allowed in levels other than 3, but DESC_PAGE has the
+			 * same value as DESC_TABLE, so we need to check.
+			 */
+			if (((desc & DESC_MASK) == TABLE_DESC) &&
+					(level < XLAT_TABLE_LEVEL_MAX)) {
+				/*
+				 * Do not print any PA for a table descriptor,
+				 * as it doesn't directly map physical memory
+				 * but instead points to the next translation
+				 * table in the translation table walk.
+				 */
+				tf_printf("%sVA:%p size:0x%zx\n",
+					  level_spacers[level],
+					  (void *)table_idx_va, level_size);
+
+				uintptr_t addr_inner = desc & TABLE_ADDR_MASK;
+
+				xlat_tables_print_internal(ctx, table_idx_va,
+					(uint64_t *)addr_inner,
+					XLAT_TABLE_ENTRIES, level + 1);
+			} else {
+				tf_printf("%sVA:%p PA:0x%llx size:0x%zx ",
+					  level_spacers[level],
+					  (void *)table_idx_va,
+					  (unsigned long long)(desc & TABLE_ADDR_MASK),
+					  level_size);
+				xlat_desc_print(ctx, desc);
+				tf_printf("\n");
+			}
+		}
+
+		table_idx++;
+		table_idx_va += level_size;
+	}
+
+	if (invalid_row_count > 1) {
+		tf_printf(invalid_descriptors_ommited,
+			  level_spacers[level], invalid_row_count - 1);
+	}
+}
+
+#endif /* LOG_LEVEL >= LOG_LEVEL_VERBOSE */
+
+void xlat_tables_print(xlat_ctx_t *ctx)
+{
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+	const char *xlat_regime_str;
+	if (ctx->xlat_regime == EL1_EL0_REGIME) {
+		xlat_regime_str = "1&0";
+	} else {
+		assert(ctx->xlat_regime == EL3_REGIME);
+		xlat_regime_str = "3";
+	}
+	VERBOSE("Translation tables state:\n");
+	VERBOSE("  Xlat regime:     EL%s\n", xlat_regime_str);
+	VERBOSE("  Max allowed PA:  0x%llx\n", ctx->pa_max_address);
+	VERBOSE("  Max allowed VA:  %p\n", (void *) ctx->va_max_address);
+	VERBOSE("  Max mapped PA:   0x%llx\n", ctx->max_pa);
+	VERBOSE("  Max mapped VA:   %p\n", (void *) ctx->max_va);
+
+	VERBOSE("  Initial lookup level: %i\n", ctx->base_level);
+	VERBOSE("  Entries @initial lookup level: %i\n",
+		ctx->base_table_entries);
+
+	int used_page_tables;
+#if PLAT_XLAT_TABLES_DYNAMIC
+	used_page_tables = 0;
+	for (unsigned int i = 0; i < ctx->tables_num; ++i) {
+		if (ctx->tables_mapped_regions[i] != 0)
+			++used_page_tables;
+	}
+#else
+	used_page_tables = ctx->next_table;
+#endif
+	VERBOSE("  Used %i sub-tables out of %i (spare: %i)\n",
+		used_page_tables, ctx->tables_num,
+		ctx->tables_num - used_page_tables);
+
+	xlat_tables_print_internal(ctx, 0, ctx->base_table,
+				   ctx->base_table_entries, ctx->base_level);
+#endif /* LOG_LEVEL >= LOG_LEVEL_VERBOSE */
+}
+
+void init_xlat_tables_ctx(xlat_ctx_t *ctx)
+{
+	assert(ctx != NULL);
+	assert(!ctx->initialized);
+	assert(ctx->xlat_regime == EL3_REGIME || ctx->xlat_regime == EL1_EL0_REGIME);
+	assert(!is_mmu_enabled_ctx(ctx));
+
+	mmap_region_t *mm = ctx->mmap;
+
+	print_mmap(mm);
+
+	/* All tables must be zeroed before mapping any region. */
+
+	for (unsigned int i = 0; i < ctx->base_table_entries; i++)
+		ctx->base_table[i] = INVALID_DESC;
+
+	for (unsigned int j = 0; j < ctx->tables_num; j++) {
+#if PLAT_XLAT_TABLES_DYNAMIC
+		ctx->tables_mapped_regions[j] = 0;
+#endif
+		for (unsigned int i = 0; i < XLAT_TABLE_ENTRIES; i++)
+			ctx->tables[j][i] = INVALID_DESC;
+	}
+
+	while (mm->size) {
+		uintptr_t end_va = xlat_tables_map_region(ctx, mm, 0, ctx->base_table,
+				ctx->base_table_entries, ctx->base_level);
+
+		if (end_va != mm->base_va + mm->size - 1) {
+			ERROR("Not enough memory to map region:\n"
+			      " VA:%p  PA:0x%llx  size:0x%zx  attr:0x%x\n",
+			      (void *)mm->base_va, mm->base_pa, mm->size, mm->attr);
+			panic();
+		}
+
+		mm++;
+	}
+
+	assert(ctx->pa_max_address <= xlat_arch_get_max_supported_pa());
+	assert(ctx->max_va <= ctx->va_max_address);
+	assert(ctx->max_pa <= ctx->pa_max_address);
+
+	ctx->initialized = 1;
+
+	xlat_tables_print(ctx);
+}
+
+void init_xlat_tables(void)
+{
+	init_xlat_tables_ctx(&tf_xlat_ctx);
+}
+
+/*
+ * If dynamic allocation of new regions is disabled then by the time we call the
+ * function enabling the MMU, we'll have registered all the memory regions to
+ * map for the system's lifetime. Therefore, at this point we know the maximum
+ * physical address that will ever be mapped.
+ *
+ * If dynamic allocation is enabled then we can't make any such assumption
+ * because the maximum physical address could get pushed while adding a new
+ * region. Therefore, in this case we have to assume that the whole address
+ * space size might be mapped.
+ */
+#ifdef PLAT_XLAT_TABLES_DYNAMIC
+#define MAX_PHYS_ADDR	tf_xlat_ctx.pa_max_address
+#else
+#define MAX_PHYS_ADDR	tf_xlat_ctx.max_pa
+#endif
+
+#ifdef AARCH32
+
+void enable_mmu_secure(unsigned int flags)
+{
+	enable_mmu_arch(flags, tf_xlat_ctx.base_table, MAX_PHYS_ADDR,
+			tf_xlat_ctx.va_max_address);
+}
+
+#else
+
+void enable_mmu_el1(unsigned int flags)
+{
+	enable_mmu_arch(flags, tf_xlat_ctx.base_table, MAX_PHYS_ADDR,
+			tf_xlat_ctx.va_max_address);
+}
+
+void enable_mmu_el3(unsigned int flags)
+{
+	enable_mmu_arch(flags, tf_xlat_ctx.base_table, MAX_PHYS_ADDR,
+			tf_xlat_ctx.va_max_address);
+}
+
+#endif /* AARCH32 */
+
+/*
+ * Do a translation table walk to find the block or page descriptor that maps
+ * virtual_addr.
+ *
+ * On success, return the address of the descriptor within the translation
+ * table. Its lookup level is stored in '*out_level'.
+ * On error, return NULL.
+ *
+ * xlat_table_base
+ *   Base address for the initial lookup level.
+ * xlat_table_base_entries
+ *   Number of entries in the translation table for the initial lookup level.
+ * virt_addr_space_size
+ *   Size in bytes of the virtual address space.
+ */
+static uint64_t *find_xlat_table_entry(uintptr_t virtual_addr,
+				       void *xlat_table_base,
+				       int xlat_table_base_entries,
+				       unsigned long long virt_addr_space_size,
+				       int *out_level)
+{
+	unsigned int start_level;
+	uint64_t *table;
+	int entries;
+
+	VERBOSE("%s(%p)\n", __func__, (void *)virtual_addr);
+
+	start_level = GET_XLAT_TABLE_LEVEL_BASE(virt_addr_space_size);
+	VERBOSE("Starting translation table walk from level %i\n", start_level);
+
+	table = xlat_table_base;
+	entries = xlat_table_base_entries;
+
+	for (unsigned int level = start_level;
+	     level <= XLAT_TABLE_LEVEL_MAX;
+	     ++level) {
+		int idx;
+		uint64_t desc;
+		uint64_t desc_type;
+
+		VERBOSE("Table address: %p\n", (void *)table);
+
+		idx = XLAT_TABLE_IDX(virtual_addr, level);
+		VERBOSE("Index into level %i table: %i\n", level, idx);
+		if (idx >= entries) {
+			VERBOSE("Invalid address\n");
+			return NULL;
+		}
+
+		desc = table[idx];
+		desc_type = desc & DESC_MASK;
+		VERBOSE("Descriptor at level %i: 0x%llx\n", level,
+				(unsigned long long)desc);
+
+		if (desc_type == INVALID_DESC) {
+			VERBOSE("Invalid entry (memory not mapped)\n");
+			return NULL;
+		}
+
+		if (level == XLAT_TABLE_LEVEL_MAX) {
+			/*
+			 * There can't be table entries at the final lookup
+			 * level.
+			 */
+			assert(desc_type == PAGE_DESC);
+			VERBOSE("Descriptor mapping a memory page (size: 0x%llx)\n",
+				(unsigned long long)XLAT_BLOCK_SIZE(XLAT_TABLE_LEVEL_MAX));
+			*out_level = level;
+			return &table[idx];
+		}
+
+		if (desc_type == BLOCK_DESC) {
+			VERBOSE("Descriptor mapping a memory block (size: 0x%llx)\n",
+				(unsigned long long)XLAT_BLOCK_SIZE(level));
+			*out_level = level;
+			return &table[idx];
+		}
+
+		assert(desc_type == TABLE_DESC);
+		VERBOSE("Table descriptor, continuing xlat table walk...\n");
+		table = (uint64_t *)(uintptr_t)(desc & TABLE_ADDR_MASK);
+		entries = XLAT_TABLE_ENTRIES;
+	}
+
+	/*
+	 * This shouldn't be reached, the translation table walk should end at
+	 * most at level XLAT_TABLE_LEVEL_MAX and return from inside the loop.
+	 */
+	assert(0);
+
+	return NULL;
+}
+
+
+static int get_mem_attributes_internal(const xlat_ctx_t *ctx, uintptr_t base_va,
+		mmap_attr_t *attributes, uint64_t **table_entry,
+		unsigned long long *addr_pa, int *table_level)
+{
+	uint64_t *entry;
+	uint64_t desc;
+	int level;
+	unsigned long long virt_addr_space_size;
+
+	/*
+	 * Sanity-check arguments.
+	 */
+	assert(ctx != NULL);
+	assert(ctx->initialized);
+	assert(ctx->xlat_regime == EL1_EL0_REGIME || ctx->xlat_regime == EL3_REGIME);
+
+	virt_addr_space_size = (unsigned long long)ctx->va_max_address + 1;
+	assert(virt_addr_space_size > 0);
+
+	entry = find_xlat_table_entry(base_va,
+				ctx->base_table,
+				ctx->base_table_entries,
+				virt_addr_space_size,
+				&level);
+	if (entry == NULL) {
+		WARN("Address %p is not mapped.\n", (void *)base_va);
+		return -EINVAL;
+	}
+
+	if (addr_pa != NULL) {
+		*addr_pa = *entry & TABLE_ADDR_MASK;
+	}
+
+	if (table_entry != NULL) {
+		*table_entry = entry;
+	}
+
+	if (table_level != NULL) {
+		*table_level = level;
+	}
+
+	desc = *entry;
+
+#if LOG_LEVEL >= LOG_LEVEL_VERBOSE
+	VERBOSE("Attributes: ");
+	xlat_desc_print(ctx, desc);
+	tf_printf("\n");
+#endif /* LOG_LEVEL >= LOG_LEVEL_VERBOSE */
+
+	assert(attributes != NULL);
+	*attributes = 0;
+
+	int attr_index = (desc >> ATTR_INDEX_SHIFT) & ATTR_INDEX_MASK;
+
+	if (attr_index == ATTR_IWBWA_OWBWA_NTR_INDEX) {
+		*attributes |= MT_MEMORY;
+	} else if (attr_index == ATTR_NON_CACHEABLE_INDEX) {
+		*attributes |= MT_NON_CACHEABLE;
+	} else {
+		assert(attr_index == ATTR_DEVICE_INDEX);
+		*attributes |= MT_DEVICE;
+	}
+
+	int ap2_bit = (desc >> AP2_SHIFT) & 1;
+
+	if (ap2_bit == AP2_RW)
+		*attributes |= MT_RW;
+
+	if (ctx->xlat_regime == EL1_EL0_REGIME) {
+		int ap1_bit = (desc >> AP1_SHIFT) & 1;
+		if (ap1_bit == AP1_ACCESS_UNPRIVILEGED)
+			*attributes |= MT_USER;
+	}
+
+	int ns_bit = (desc >> NS_SHIFT) & 1;
+
+	if (ns_bit == 1)
+		*attributes |= MT_NS;
+
+	uint64_t xn_mask = xlat_arch_regime_get_xn_desc(ctx->xlat_regime);
+
+	if ((desc & xn_mask) == xn_mask) {
+		*attributes |= MT_EXECUTE_NEVER;
+	} else {
+		assert((desc & xn_mask) == 0);
+	}
+
+	return 0;
+}
+
+
+int get_mem_attributes(const xlat_ctx_t *ctx, uintptr_t base_va,
+		mmap_attr_t *attributes)
+{
+	return get_mem_attributes_internal(ctx, base_va, attributes,
+					   NULL, NULL, NULL);
+}
+
+
+int change_mem_attributes(xlat_ctx_t *ctx,
+			uintptr_t base_va,
+			size_t size,
+			mmap_attr_t attr)
+{
+	/* Note: This implementation isn't optimized. */
+
+	assert(ctx != NULL);
+	assert(ctx->initialized);
+
+	unsigned long long virt_addr_space_size =
+		(unsigned long long)ctx->va_max_address + 1;
+	assert(virt_addr_space_size > 0);
+
+	if (!IS_PAGE_ALIGNED(base_va)) {
+		WARN("%s: Address %p is not aligned on a page boundary.\n",
+		     __func__, (void *)base_va);
+		return -EINVAL;
+	}
+
+	if (size == 0) {
+		WARN("%s: Size is 0.\n", __func__);
+		return -EINVAL;
+	}
+
+	if ((size % PAGE_SIZE) != 0) {
+		WARN("%s: Size 0x%zx is not a multiple of a page size.\n",
+		     __func__, size);
+		return -EINVAL;
+	}
+
+	if (((attr & MT_EXECUTE_NEVER) == 0) && ((attr & MT_RW) != 0)) {
+		WARN("%s() doesn't allow to remap memory as read-write and executable.\n",
+		     __func__);
+		return -EINVAL;
+	}
+
+	int pages_count = size / PAGE_SIZE;
+
+	VERBOSE("Changing memory attributes of %i pages starting from address %p...\n",
+		pages_count, (void *)base_va);
+
+	uintptr_t base_va_original = base_va;
+
+	/*
+	 * Sanity checks.
+	 */
+	for (int i = 0; i < pages_count; ++i) {
+		uint64_t *entry;
+		uint64_t desc;
+		int level;
+
+		entry = find_xlat_table_entry(base_va,
+					      ctx->base_table,
+					      ctx->base_table_entries,
+					      virt_addr_space_size,
+					      &level);
+		if (entry == NULL) {
+			WARN("Address %p is not mapped.\n", (void *)base_va);
+			return -EINVAL;
+		}
+
+		desc = *entry;
+
+		/*
+		 * Check that all the required pages are mapped at page
+		 * granularity.
+		 */
+		if (((desc & DESC_MASK) != PAGE_DESC) ||
+			(level != XLAT_TABLE_LEVEL_MAX)) {
+			WARN("Address %p is not mapped at the right granularity.\n",
+			     (void *)base_va);
+			WARN("Granularity is 0x%llx, should be 0x%x.\n",
+			     (unsigned long long)XLAT_BLOCK_SIZE(level), PAGE_SIZE);
+			return -EINVAL;
+		}
+
+		/*
+		 * If the region type is device, it shouldn't be executable.
+		 */
+		int attr_index = (desc >> ATTR_INDEX_SHIFT) & ATTR_INDEX_MASK;
+		if (attr_index == ATTR_DEVICE_INDEX) {
+			if ((attr & MT_EXECUTE_NEVER) == 0) {
+				WARN("Setting device memory as executable at address %p.",
+				     (void *)base_va);
+				return -EINVAL;
+			}
+		}
+
+		base_va += PAGE_SIZE;
+	}
+
+	/* Restore original value. */
+	base_va = base_va_original;
+
+	VERBOSE("%s: All pages are mapped, now changing their attributes...\n",
+		__func__);
+
+	for (int i = 0; i < pages_count; ++i) {
+
+		mmap_attr_t old_attr, new_attr;
+		uint64_t *entry;
+		int level;
+		unsigned long long addr_pa;
+
+		get_mem_attributes_internal(ctx, base_va, &old_attr,
+					    &entry, &addr_pa, &level);
+
+		VERBOSE("Old attributes: 0x%x\n", old_attr);
+
+		/*
+		 * From attr, only MT_RO/MT_RW, MT_EXECUTE/MT_EXECUTE_NEVER and
+		 * MT_USER/MT_PRIVILEGED are taken into account. Any other
+		 * information is ignored.
+		 */
+
+		/* Clean the old attributes so that they can be rebuilt. */
+		new_attr = old_attr & ~(MT_RW|MT_EXECUTE_NEVER|MT_USER);
+
+		/*
+		 * Update attributes, but filter out the ones this function
+		 * isn't allowed to change.
+		 */
+		new_attr |= attr & (MT_RW|MT_EXECUTE_NEVER|MT_USER);
+
+		VERBOSE("New attributes: 0x%x\n", new_attr);
+
+		/*
+		 * The break-before-make sequence requires writing an invalid
+		 * descriptor and making sure that the system sees the change
+		 * before writing the new descriptor.
+		 */
+		*entry = INVALID_DESC;
+
+		/* Invalidate any cached copy of this mapping in the TLBs. */
+		xlat_arch_tlbi_va_regime(base_va, ctx->xlat_regime);
+
+		/* Ensure completion of the invalidation. */
+		xlat_arch_tlbi_va_sync();
+
+		/* Write new descriptor */
+		*entry = xlat_desc(ctx, new_attr, addr_pa, level);
+
+		base_va += PAGE_SIZE;
+	}
+
+	/* Ensure that the last descriptor writen is seen by the system. */
+	dsbish();
+
+	return 0;
+}
diff --git a/lib/xlat_tables_v2/xlat_tables_private.h b/lib/xlat_tables_v2/xlat_tables_private.h
new file mode 100644
index 00000000..79efbebb
--- /dev/null
+++ b/lib/xlat_tables_v2/xlat_tables_private.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef __XLAT_TABLES_PRIVATE_H__
+#define __XLAT_TABLES_PRIVATE_H__
+
+#include <platform_def.h>
+#include <xlat_tables_defs.h>
+
+#if PLAT_XLAT_TABLES_DYNAMIC
+/*
+ * Shifts and masks to access fields of an mmap_attr_t
+ */
+/* Dynamic or static */
+#define MT_DYN_SHIFT		30 /* 31 would cause undefined behaviours */
+
+/*
+ * Memory mapping private attributes
+ *
+ * Private attributes not exposed in the mmap_attr_t enum.
+ */
+typedef enum  {
+	/*
+	 * Regions mapped before the MMU can't be unmapped dynamically (they are
+	 * static) and regions mapped with MMU enabled can be unmapped. This
+	 * behaviour can't be overridden.
+	 *
+	 * Static regions can overlap each other, dynamic regions can't.
+	 */
+	MT_STATIC	= 0 << MT_DYN_SHIFT,
+	MT_DYNAMIC	= 1 << MT_DYN_SHIFT
+} mmap_priv_attr_t;
+
+#endif /* PLAT_XLAT_TABLES_DYNAMIC */
+
+/*
+ * Invalidate all TLB entries that match the given virtual address. This
+ * operation applies to all PEs in the same Inner Shareable domain as the PE
+ * that executes this function. This functions must be called for every
+ * translation table entry that is modified.
+ *
+ * xlat_arch_tlbi_va() applies the invalidation to the exception level of the
+ * current translation regime, whereas xlat_arch_tlbi_va_regime() applies it to
+ * the given translation regime.
+ *
+ * Note, however, that it is architecturally UNDEFINED to invalidate TLB entries
+ * pertaining to a higher exception level, e.g. invalidating EL3 entries from
+ * S-EL1.
+ */
+void xlat_arch_tlbi_va(uintptr_t va);
+void xlat_arch_tlbi_va_regime(uintptr_t va, xlat_regime_t xlat_regime);
+
+/*
+ * This function has to be called at the end of any code that uses the function
+ * xlat_arch_tlbi_va().
+ */
+void xlat_arch_tlbi_va_sync(void);
+
+/* Print VA, PA, size and attributes of all regions in the mmap array. */
+void print_mmap(mmap_region_t *const mmap);
+
+/*
+ * Print the current state of the translation tables by reading them from
+ * memory.
+ */
+void xlat_tables_print(xlat_ctx_t *ctx);
+
+/*
+ * Architecture-specific initialization code.
+ */
+
+/* Returns the current Exception Level. The returned EL must be 1 or higher. */
+int xlat_arch_current_el(void);
+
+/*
+ * Return the maximum physical address supported by the hardware.
+ * This value depends on the execution state (AArch32/AArch64).
+ */
+unsigned long long xlat_arch_get_max_supported_pa(void);
+
+/* Enable MMU and configure it to use the specified translation tables. */
+void enable_mmu_arch(unsigned int flags, uint64_t *base_table,
+		unsigned long long pa, uintptr_t max_va);
+
+/*
+ * Return 1 if the MMU of the translation regime managed by the given xlat_ctx_t
+ * is enabled, 0 otherwise.
+ */
+int is_mmu_enabled_ctx(const xlat_ctx_t *ctx);
+
+#endif /* __XLAT_TABLES_PRIVATE_H__ */