summaryrefslogtreecommitdiff
path: root/nuttx/lib
diff options
context:
space:
mode:
authorpatacongo <patacongo@42af7a65-404d-4744-a932-0658087f49c3>2012-10-21 00:41:44 +0000
committerpatacongo <patacongo@42af7a65-404d-4744-a932-0658087f49c3>2012-10-21 00:41:44 +0000
commitb007f9897061c921e685d3970035228f21a2e506 (patch)
tree35873755480bfc436875406748f17798082c333e /nuttx/lib
parentde7e070e684796f52cf5b4466e7b16755a3a92c8 (diff)
downloadpx4-nuttx-b007f9897061c921e685d3970035228f21a2e506.tar.gz
px4-nuttx-b007f9897061c921e685d3970035228f21a2e506.tar.bz2
px4-nuttx-b007f9897061c921e685d3970035228f21a2e506.zip
Add a versin of memset() optimized for speed
git-svn-id: svn://svn.code.sf.net/p/nuttx/code/trunk@5242 42af7a65-404d-4744-a932-0658087f49c3
Diffstat (limited to 'nuttx/lib')
-rw-r--r--nuttx/lib/Kconfig37
-rw-r--r--nuttx/lib/string/lib_memset.c84
2 files changed, 107 insertions, 14 deletions
diff --git a/nuttx/lib/Kconfig b/nuttx/lib/Kconfig
index b3f743db2..0f25c8923 100644
--- a/nuttx/lib/Kconfig
+++ b/nuttx/lib/Kconfig
@@ -153,19 +153,20 @@ config ARCH_OPTIMIZED_FUNCTIONS
if ARCH_OPTIMIZED_FUNCTIONS
config ARCH_MEMCPY
- bool "memcpy"
+ bool "memcpy()"
default n
---help---
Select this option if the architecture provides an optimized version
of memcpy().
config MEMCPY_VIK
- bool "Vik memcpy"
+ bool "Vik memcpy()"
default n
depends on !ARCH_MEMCPY
---help---
- Select this option to use the optimized memcpy() function by Daniel Vik.
- See licensing information in the top-level COPYING file.
+ Select this option to use the optimized memcpy() function by Daniel Vik.
+ Select this option to option for speed at the expense of increased size.
+ See licensing information in the top-level COPYING file.
if MEMCPY_VIK
config MEMCPY_PRE_INC_PTRS
@@ -182,50 +183,58 @@ config MEMCPY_INDEXED_COPY
MEMCPY_PRE_INC_PTRS option.
config MEMCPY_64BIT
- bool "64-bit memcpy"
+ bool "64-bit memcpy()"
default n
---help---
- Compiles memcpy for 64 bit architectures
+ Compiles memcpy() for 64 bit architectures
endif
config ARCH_MEMCMP
- bool "memcmp"
+ bool "memcmp()"
default n
---help---
Select this option if the architecture provides an optimized version
of memcmp().
config ARCH_MEMMOVE
- bool "memmove"
+ bool "memmove()"
default n
---help---
Select this option if the architecture provides an optimized version
of memmove().
config ARCH_MEMSET
- bool "memset"
+ bool "memset()"
default n
---help---
Select this option if the architecture provides an optimized version
of memset().
+config MEMSET_OPTSPEED
+ bool "Optimize memset() for speed"
+ default n
+ depends on !ARCH_MEMSET
+ ---help---
+ Select this option to use a version of memcpy() optimized for speed.
+ Default: memcpy() is optimized for size.
+
config ARCH_STRCMP
- bool "strcmp"
+ bool "strcmp()"
default n
---help---
Select this option if the architecture provides an optimized version
of strcmp().
config ARCH_STRCPY
- bool "strcpy"
+ bool "strcpy()"
default n
---help---
Select this option if the architecture provides an optimized version
of strcpy().
config ARCH_STRNCPY
- bool "strncpy"
+ bool "strncpy()"
default n
---help---
Select this option if the architecture provides an optimized version
@@ -239,14 +248,14 @@ config ARCH_STRLEN
of strlen().
config ARCH_STRNLEN
- bool "strlen"
+ bool "strlen()"
default n
---help---
Select this option if the architecture provides an optimized version
of strnlen().
config ARCH_BZERO
- bool "bzero"
+ bool "bzero()"
default n
---help---
Select this option if the architecture provides an optimized version
diff --git a/nuttx/lib/string/lib_memset.c b/nuttx/lib/string/lib_memset.c
index 916351b97..c910d2ce0 100644
--- a/nuttx/lib/string/lib_memset.c
+++ b/nuttx/lib/string/lib_memset.c
@@ -42,8 +42,12 @@
************************************************************/
#include <nuttx/config.h>
+
#include <sys/types.h>
+
+#include <stdint.h>
#include <string.h>
+#include <assert.h>
/************************************************************
* Global Functions
@@ -52,8 +56,88 @@
#ifndef CONFIG_ARCH_MEMSET
void *memset(void *s, int c, size_t n)
{
+#ifdef CONFIG_MEMSET_OPTSPEED
+ /* This version is optimized for speed (you could do better
+ * still by exploiting processor caching or memory burst
+ * knowledge. 64-bit support might improve performance as
+ * well.
+ */
+
+ uintptr_t addr = (uintptr_t)s;
+ uint16_t val16 = ((uint16_t)c << 8) | (uint16_t)c;
+ uint32_t val32 = ((uint32_t)val16 << 16) | (uint32_t)val16;
+
+ /* Make sure that there is something to be cleared */
+
+ if (n > 0)
+ {
+ /* Align to a 16-bit boundary */
+
+ if ((addr & 1) != 0)
+ {
+ *(uint8_t*)addr = (uint8_t)c;
+ addr += 1;
+ n -= 1;
+ }
+
+ /* Check if there are at least 16-bits left to be zeroed */
+
+ if (n >= 2)
+ {
+ /* Align to a 32-bit boundary (we know that the destination
+ * address is already aligned to at least a 16-bit boundary).
+ */
+
+ if ((addr & 3) != 0)
+ {
+ *(uint16_t*)addr = val16;
+ addr += 2;
+ n -= 2;
+ }
+
+ /* Loop while there are at least 32-bits left to be zeroed */
+
+ while (n >= 4)
+ {
+ *(uint32_t*)addr = val32;
+ addr += 4;
+ n -= 4;
+ }
+ }
+
+ /* We may get here under the following conditions:
+ *
+ * n = 0, addr may or may not be aligned
+ * n = 1, addr may or may not be aligned
+ * n = 2, addr is aligned to a 32-bit boundary
+ * n = 3, addr is aligned to a 32-bit boundary
+ */
+
+ switch (n)
+ {
+ default:
+ case 0:
+ DEBUGASSERT(n == 0);
+ break;
+
+ case 2:
+ *(uint16_t*)addr = val16;
+ break;
+
+ case 3:
+ *(uint16_t*)addr = val16;
+ addr += 2;
+ case 1:
+ *(uint8_t*)addr = (uint8_t)c;
+ break;
+ }
+ }
+#else
+ /* This version is optimized for size */
+
unsigned char *p = (unsigned char*)s;
while (n-- > 0) *p++ = c;
+#endif
return s;
}
#endif