summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Hung <hunga@google.com>2024-02-01 18:25:45 -0800
committerAndy Hung <hunga@google.com>2024-03-12 14:07:24 -0700
commit4a0897fd6085f6e2e1618d4c466982a4af4b445e (patch)
tree87be6b5066ad1b53cbd2aff26d5d52c03d6f1ce3
parentf33da679600164b3f8872dc901fdea576eeed63b (diff)
downloadmedia-4a0897fd6085f6e2e1618d4c466982a4af4b445e.tar.gz
power: Use intrinsic optimizations
Use general intrinsic_utils library for more portable optimization. Test: atest audio_power_benchmark Test: atest power_tests Bug: 323610467 Merged-In: I9a7df0257fc83ba7bf22316247170d1bd95c2700 Change-Id: I9a7df0257fc83ba7bf22316247170d1bd95c2700
-rw-r--r--audio_utils/benchmarks/audio_power_benchmark.cpp371
-rw-r--r--audio_utils/include/audio_utils/intrinsic_utils.h88
-rw-r--r--audio_utils/power.cpp76
3 files changed, 300 insertions, 235 deletions
diff --git a/audio_utils/benchmarks/audio_power_benchmark.cpp b/audio_utils/benchmarks/audio_power_benchmark.cpp
index 130aec09..b355d976 100644
--- a/audio_utils/benchmarks/audio_power_benchmark.cpp
+++ b/audio_utils/benchmarks/audio_power_benchmark.cpp
@@ -28,194 +28,193 @@ Pixel 7 (USE_NEON code) 1024 frames
------------------------------------------------------------------------------------
Benchmark Time CPU Iteration
------------------------------------------------------------------------------------
-audio_power_benchmark:
- #BM_Power_PCM16/0 182.2241901911555 ns 181.4585420030687 ns 3856373
- #BM_Power_PCM16/1 182.3181837746463 ns 181.48175238693838 ns 3856614
- #BM_Power_PCM16/2 368.0452274813401 ns 366.3017598457351 ns 1912554
- #BM_Power_PCM16/3 554.2789131893876 ns 551.5793025147027 ns 1272715
- #BM_Power_PCM16/4 734.544472135247 ns 730.801475058127 ns 957386
- #BM_Power_PCM16/5 734.432967920759 ns 731.1678748377902 ns 957094
- #BM_Power_PCM16/6 733.8069997996705 ns 730.5650734403838 ns 958056
- #BM_Power_PCM16/7 731.1269183224047 ns 727.9060141898467 ns 957868
- #BM_Power_PCM16/8 915.4654669581546 ns 910.8886686647353 ns 768409
- #BM_Power_PCM16/9 916.2991591460016 ns 911.7267578221812 ns 766602
- #BM_Power_PCM16/10 916.2899150794203 ns 912.0552662308543 ns 767304
- #BM_Power_PCM16/11 1096.5960966896591 ns 1091.2493295035579 ns 640943
- #BM_Power_PCM16/12 1097.8511309779592 ns 1091.0261081397603 ns 640375
- #BM_Power_PCM16/13 1097.9944254893203 ns 1091.1471948274514 ns 641850
- #BM_Power_PCM16/14 1279.808649441679 ns 1273.7557073893663 ns 550602
- #BM_Power_PCM16/15 1458.9768502853626 ns 1451.9896279814072 ns 481777
- #BM_Power_PCM16/16 1458.2711695604448 ns 1451.9060239613882 ns 481942
- #BM_Power_PCM16/17 1819.4835449076445 ns 1812.0266937315016 ns 386233
- #BM_Power_PCM16/18 1820.3459564206057 ns 1812.1602221110884 ns 385933
- #BM_Power_PCM16/19 2181.369784896039 ns 2171.339075235256 ns 322309
- #BM_Power_PCM16/20 2905.2177625941545 ns 2891.0355549312485 ns 242048
- #BM_Power_PCM16/21 2364.132933388578 ns 2353.0530248033974 ns 297540
- #BM_Power_PCM16/22 4349.369949027189 ns 4330.079046346484 ns 161652
- #BM_Power_PCM24/0 726.8133229852367 ns 723.3602813367419 ns 967666
- #BM_Power_PCM24/1 726.8365225340423 ns 723.350181307502 ns 967693
- #BM_Power_PCM24/2 1453.469049006606 ns 1446.6400141353229 ns 483894
- #BM_Power_PCM24/3 2180.028380578979 ns 2169.873979821151 ns 322615
- #BM_Power_PCM24/4 2906.8811984734084 ns 2892.9251947515163 ns 241975
- #BM_Power_PCM24/5 2905.392592651063 ns 2892.431327900298 ns 241976
- #BM_Power_PCM24/6 2906.3533070213225 ns 2892.6723102671663 ns 241985
- #BM_Power_PCM24/7 2906.669533308557 ns 2892.9162117373876 ns 241979
- #BM_Power_PCM24/8 3634.8299686929113 ns 3616.1580312219085 ns 193582
- #BM_Power_PCM24/9 3646.2416687724785 ns 3615.1269559198017 ns 193579
- #BM_Power_PCM24/10 3641.1275370266394 ns 3615.927121699737 ns 193583
- #BM_Power_PCM24/11 4355.066508388703 ns 4339.191726899678 ns 161318
- #BM_Power_PCM24/12 4360.341786985582 ns 4339.590976436824 ns 161311
- #BM_Power_PCM24/13 4358.200731467333 ns 4339.247979171812 ns 161320
- #BM_Power_PCM24/14 5086.769189048403 ns 5062.498152152655 ns 138269
- #BM_Power_PCM24/15 5810.423653580803 ns 5785.767373623829 ns 120988
- #BM_Power_PCM24/16 5812.39702625609 ns 5786.230130917745 ns 120992
- #BM_Power_PCM24/17 7265.510668878706 ns 7232.779768427578 ns 96730
- #BM_Power_PCM24/18 7264.246771353663 ns 7231.77107139167 ns 96790
- #BM_Power_PCM24/19 8722.56894669041 ns 8684.567792613941 ns 80584
- #BM_Power_PCM24/20 11628.424309578584 ns 11575.683942450829 ns 60470
- #BM_Power_PCM24/21 9448.977902926888 ns 9406.63598973104 ns 74399
- #BM_Power_PCM24/22 17459.032419808307 ns 17375.671358355692 ns 40284
- #BM_Power_PCM32/0 180.64821432650652 ns 179.97205866590213 ns 3890437
- #BM_Power_PCM32/1 180.59328255286601 ns 180.00214854248506 ns 3890079
- #BM_Power_PCM32/2 367.3883165270478 ns 365.8843055170383 ns 1910990
- #BM_Power_PCM32/3 547.961739191163 ns 545.3393246259511 ns 1283585
- #BM_Power_PCM32/4 732.3382747450341 ns 729.2189395984298 ns 963220
- #BM_Power_PCM32/5 729.9914091395625 ns 726.6235749813377 ns 963233
- #BM_Power_PCM32/6 730.3738877114395 ns 726.9318824314772 ns 963217
- #BM_Power_PCM32/7 729.978907085062 ns 726.8274224511407 ns 963167
- #BM_Power_PCM32/8 912.4335730958729 ns 908.6304864721727 ns 770486
- #BM_Power_PCM32/9 915.197196607744 ns 910.899041307122 ns 770424
- #BM_Power_PCM32/10 912.5102563972879 ns 908.5750461421018 ns 770446
- #BM_Power_PCM32/11 1098.4029936865272 ns 1093.6704840140765 ns 641283
- #BM_Power_PCM32/12 1096.636110073461 ns 1091.8296782198036 ns 639971
- #BM_Power_PCM32/13 1096.700748278668 ns 1091.723232471321 ns 641206
- #BM_Power_PCM32/14 1278.092437845839 ns 1273.2958269366497 ns 549764
- #BM_Power_PCM32/15 1461.4543514169204 ns 1454.7416748932296 ns 481165
- #BM_Power_PCM32/16 1461.7273813451243 ns 1454.7411392391905 ns 481251
- #BM_Power_PCM32/17 1825.907444290439 ns 1817.8784066977225 ns 385087
- #BM_Power_PCM32/18 1825.381231595566 ns 1817.6661308667603 ns 385094
- #BM_Power_PCM32/19 2190.975242630068 ns 2180.597490904206 ns 320753
- #BM_Power_PCM32/20 2928.6714986526026 ns 2914.821292933981 ns 240136
- #BM_Power_PCM32/21 2372.874287158484 ns 2361.940219335247 ns 296350
- #BM_Power_PCM32/22 4388.368818947491 ns 4369.06687184351 ns 160187
- #BM_Power_FLOAT/0 168.00676835156023 ns 167.22008168082328 ns 4185805
- #BM_Power_FLOAT/1 167.9938065702452 ns 167.21758154288213 ns 4186049
- #BM_Power_FLOAT/2 351.86860213833324 ns 350.24728499757555 ns 1998617
- #BM_Power_FLOAT/3 532.7485100122135 ns 530.3216093916758 ns 1319977
- #BM_Power_FLOAT/4 716.865648210546 ns 713.9932490536078 ns 980307
- #BM_Power_FLOAT/5 717.0498470928635 ns 713.9746434393012 ns 980338
- #BM_Power_FLOAT/6 717.1775874821899 ns 713.9746211792758 ns 980345
- #BM_Power_FLOAT/7 717.286950836443 ns 713.9845488011172 ns 980377
- #BM_Power_FLOAT/8 899.506933295295 ns 895.4523624630964 ns 781663
- #BM_Power_FLOAT/9 899.6416845109553 ns 895.4465512345158 ns 781758
- #BM_Power_FLOAT/10 899.7137607146544 ns 895.4340975403337 ns 781769
- #BM_Power_FLOAT/11 1080.3481243192064 ns 1075.391753733447 ns 650937
- #BM_Power_FLOAT/12 1080.3754966280014 ns 1075.4162669575685 ns 650890
- #BM_Power_FLOAT/13 1080.4839543269304 ns 1075.418525654029 ns 650892
- #BM_Power_FLOAT/14 1260.9659337690678 ns 1255.3850934907016 ns 557649
- #BM_Power_FLOAT/15 1441.347092340657 ns 1435.2933780849712 ns 487729
- #BM_Power_FLOAT/16 1441.0252178558349 ns 1435.2747308851679 ns 487710
- #BM_Power_FLOAT/17 1803.174621574897 ns 1795.239832164671 ns 389906
- #BM_Power_FLOAT/18 1803.6167320222003 ns 1795.1778607571464 ns 389923
- #BM_Power_FLOAT/19 2164.5316780757366 ns 2155.090783692497 ns 324783
- #BM_Power_FLOAT/20 2895.0966589259547 ns 2881.551857432239 ns 242916
- #BM_Power_FLOAT/21 2346.6239754749067 ns 2335.133450977916 ns 299773
- #BM_Power_FLOAT/22 4336.788207490892 ns 4319.045067224524 ns 162069
+ #BM_Power_PCM16/0 180.97891633448947 ns 180.1272276967831 ns 3885852
+ #BM_Power_PCM16/1 180.947387663606 ns 180.12366729181554 ns 3886541
+ #BM_Power_PCM16/2 366.788606340951 ns 365.0981633489325 ns 1915715
+ #BM_Power_PCM16/3 548.7117372721175 ns 546.1077741464976 ns 1282070
+ #BM_Power_PCM16/4 729.3091325787765 ns 725.8914895381871 ns 964460
+ #BM_Power_PCM16/5 729.5107214636998 ns 725.9903921261499 ns 964001
+ #BM_Power_PCM16/6 729.3142860987979 ns 725.7270789310516 ns 964462
+ #BM_Power_PCM16/7 728.8285153846795 ns 725.6857445895254 ns 964518
+ #BM_Power_PCM16/8 910.266543588675 ns 905.9784034899902 ns 773412
+ #BM_Power_PCM16/9 909.7631943495237 ns 905.5980073450368 ns 773039
+ #BM_Power_PCM16/10 909.4817518460887 ns 905.3046969675422 ns 773201
+ #BM_Power_PCM16/11 1090.5752804035608 ns 1085.4067030965386 ns 645075
+ #BM_Power_PCM16/12 1090.1430777061082 ns 1085.3985691918976 ns 645090
+ #BM_Power_PCM16/13 1090.074475934731 ns 1085.1795632613716 ns 645054
+ #BM_Power_PCM16/14 1271.0181027106237 ns 1265.0655742741403 ns 553342
+ #BM_Power_PCM16/15 1450.921852464041 ns 1444.5727666390078 ns 484494
+ #BM_Power_PCM16/16 1451.2240897596057 ns 1444.6554862570126 ns 484538
+ #BM_Power_PCM16/17 1812.7695622285537 ns 1804.1137781191355 ns 387992
+ #BM_Power_PCM16/18 1812.6019333363201 ns 1804.0487200830873 ns 388033
+ #BM_Power_PCM16/19 2172.960068355248 ns 2163.5280680444976 ns 323553
+ #BM_Power_PCM16/20 2896.932275617628 ns 2882.070808558297 ns 242852
+ #BM_Power_PCM16/21 2354.0114820280833 ns 2343.273892215966 ns 298727
+ #BM_Power_PCM16/22 4341.411563591838 ns 4320.3065557379705 ns 162026
+ #BM_Power_PCM24/0 726.5703267242005 ns 723.3638876888978 ns 967598
+ #BM_Power_PCM24/1 726.8478512196587 ns 723.3996910023831 ns 967645
+ #BM_Power_PCM24/2 1453.522365501612 ns 1446.817841538075 ns 483826
+ #BM_Power_PCM24/3 2180.4783136728597 ns 2170.2972985542474 ns 322531
+ #BM_Power_PCM24/4 2907.3817241064085 ns 2893.78803763662 ns 241892
+ #BM_Power_PCM24/5 2907.93437008585 ns 2893.9130060933658 ns 241902
+ #BM_Power_PCM24/6 2907.476982094115 ns 2893.869563779754 ns 241942
+ #BM_Power_PCM24/7 2906.2742418195558 ns 2893.721987961765 ns 241896
+ #BM_Power_PCM24/8 3634.429566572946 ns 3617.2687167986146 ns 193516
+ #BM_Power_PCM24/9 3637.612631382197 ns 3617.0655648749726 ns 193503
+ #BM_Power_PCM24/10 3637.7966951345143 ns 3617.4579242709196 ns 193532
+ #BM_Power_PCM24/11 4360.471518019849 ns 4340.702796725793 ns 161260
+ #BM_Power_PCM24/12 4359.483557914242 ns 4340.688663859648 ns 161263
+ #BM_Power_PCM24/13 4361.102585926522 ns 4340.699237256582 ns 161260
+ #BM_Power_PCM24/14 5087.945822701728 ns 5064.266827288351 ns 138213
+ #BM_Power_PCM24/15 5814.759977572986 ns 5787.690635285585 ns 120922
+ #BM_Power_PCM24/16 5815.644749452265 ns 5787.2178931701865 ns 120940
+ #BM_Power_PCM24/17 7266.470565121046 ns 7234.610670139329 ns 96756
+ #BM_Power_PCM24/18 7268.525628728221 ns 7234.5110649425615 ns 96747
+ #BM_Power_PCM24/19 8716.083166871316 ns 8682.350888726245 ns 80621
+ #BM_Power_PCM24/20 11639.32870672943 ns 11580.26249875915 ns 60446
+ #BM_Power_PCM24/21 9446.785352486686 ns 9405.332101734537 ns 74429
+ #BM_Power_PCM24/22 17463.72223575373 ns 17383.809277838518 ns 40268
+ #BM_Power_PCM32/0 179.84309567370832 ns 179.0676549126645 ns 3908541
+ #BM_Power_PCM32/1 179.9404439204146 ns 179.0010033549121 ns 3911876
+ #BM_Power_PCM32/2 367.16608437351766 ns 365.42729213737346 ns 1915123
+ #BM_Power_PCM32/3 550.277695550067 ns 547.9387707497883 ns 1277543
+ #BM_Power_PCM32/4 731.4224075405458 ns 728.3597142669796 ns 960757
+ #BM_Power_PCM32/5 731.8896053180963 ns 728.7127664668686 ns 960626
+ #BM_Power_PCM32/6 731.6139555435336 ns 728.404691132571 ns 960493
+ #BM_Power_PCM32/7 731.4526007701458 ns 728.5303351299617 ns 952526
+ #BM_Power_PCM32/8 914.9957172964164 ns 910.4922825617709 ns 768908
+ #BM_Power_PCM32/9 914.9228864106034 ns 910.3200879900779 ns 768723
+ #BM_Power_PCM32/10 914.4295313745796 ns 910.2645292119196 ns 768796
+ #BM_Power_PCM32/11 1097.4819073401923 ns 1092.6099713796307 ns 640453
+ #BM_Power_PCM32/12 1098.341821642604 ns 1093.1572358279782 ns 640293
+ #BM_Power_PCM32/13 1098.2661898965437 ns 1092.878152356299 ns 636508
+ #BM_Power_PCM32/14 1280.2780676971747 ns 1274.5237323564186 ns 549208
+ #BM_Power_PCM32/15 1462.3197864285032 ns 1455.9130929404507 ns 480824
+ #BM_Power_PCM32/16 1462.7091956393392 ns 1455.9862690566056 ns 480739
+ #BM_Power_PCM32/17 1827.1057526954876 ns 1818.949706155908 ns 384898
+ #BM_Power_PCM32/18 1827.7309048929412 ns 1819.2175362902751 ns 384745
+ #BM_Power_PCM32/19 2191.6910336275487 ns 2182.0570090137303 ns 320844
+ #BM_Power_PCM32/20 2932.1382076410487 ns 2917.764512203257 ns 239936
+ #BM_Power_PCM32/21 2384.572833177335 ns 2374.048604969875 ns 296266
+ #BM_Power_PCM32/22 4396.192134082954 ns 4376.258719439749 ns 159930
+ #BM_Power_FLOAT/0 167.25444234000543 ns 166.47183513094348 ns 4204937
+ #BM_Power_FLOAT/1 167.24129258362885 ns 166.4590919078933 ns 4205036
+ #BM_Power_FLOAT/2 351.4320774910175 ns 349.8888578977785 ns 2000574
+ #BM_Power_FLOAT/3 532.5129107539586 ns 530.0978074251175 ns 1320411
+ #BM_Power_FLOAT/4 716.8967761364072 ns 713.6935400332438 ns 980810
+ #BM_Power_FLOAT/5 716.7889045409055 ns 713.7320038176421 ns 980708
+ #BM_Power_FLOAT/6 717.0752796480707 ns 713.6972366395144 ns 980690
+ #BM_Power_FLOAT/7 716.6102092628516 ns 713.7024690577491 ns 980698
+ #BM_Power_FLOAT/8 899.5296958205339 ns 894.9512572444829 ns 782147
+ #BM_Power_FLOAT/9 899.2820069645962 ns 894.9592230725831 ns 782158
+ #BM_Power_FLOAT/10 899.3434261268515 ns 894.9564490846954 ns 782142
+ #BM_Power_FLOAT/11 1079.2320050706119 ns 1074.9236482969181 ns 651197
+ #BM_Power_FLOAT/12 1079.995203673193 ns 1074.9423856936505 ns 651123
+ #BM_Power_FLOAT/13 1080.2950176525237 ns 1074.9553632314376 ns 651100
+ #BM_Power_FLOAT/14 1260.4052233815107 ns 1254.9116087466343 ns 557838
+ #BM_Power_FLOAT/15 1441.558180305023 ns 1434.8599962279143 ns 487794
+ #BM_Power_FLOAT/16 1442.0232631999454 ns 1434.885356768729 ns 487809
+ #BM_Power_FLOAT/17 1802.938510952263 ns 1794.8471734733112 ns 390037
+ #BM_Power_FLOAT/18 1802.3492608362737 ns 1794.8067419071836 ns 390038
+ #BM_Power_FLOAT/19 2163.5362062414747 ns 2154.6858791842915 ns 324875
+ #BM_Power_FLOAT/20 2894.779758388722 ns 2882.248193812651 ns 242915
+ #BM_Power_FLOAT/21 2345.738662867257 ns 2334.692247141554 ns 299812
+ #BM_Power_FLOAT/22 4341.005108230952 ns 4318.48899075213 ns 162091
-Pixel 7 (without NEON)
+Pixel 7 (generic intrinsics)
audio_power_benchmark:
- #BM_Power_PCM16/0 710.0176225862541 ns 706.5369398066563 ns 990774
- #BM_Power_PCM16/1 709.5382576613781 ns 706.4966294743109 ns 990795
- #BM_Power_PCM16/2 1432.22333510522 ns 1425.8151113120955 ns 490917
- #BM_Power_PCM16/3 2154.9135527473695 ns 2144.662213834012 ns 326384
- #BM_Power_PCM16/4 2878.298555528755 ns 2864.2879572796446 ns 244380
- #BM_Power_PCM16/5 2876.5968518680247 ns 2864.1508803227484 ns 244399
- #BM_Power_PCM16/6 2876.786253481042 ns 2864.19396835658 ns 244411
- #BM_Power_PCM16/7 2877.9274599737573 ns 2864.401922234998 ns 244403
- #BM_Power_PCM16/8 3599.2718202583505 ns 3583.2360171183445 ns 195346
- #BM_Power_PCM16/9 3598.8521185858167 ns 3583.090189695244 ns 195366
- #BM_Power_PCM16/10 3607.3259044670376 ns 3583.0856094265077 ns 195364
- #BM_Power_PCM16/11 4322.178672831243 ns 4301.900640355943 ns 162722
- #BM_Power_PCM16/12 4322.413127245351 ns 4301.978619057853 ns 162715
- #BM_Power_PCM16/13 4321.293715661808 ns 4301.96961074716 ns 162722
- #BM_Power_PCM16/14 5045.423164118614 ns 5021.725069589923 ns 139388
- #BM_Power_PCM16/15 5771.999819383823 ns 5746.762121734573 ns 121806
- #BM_Power_PCM16/16 5772.041574304546 ns 5746.705014531301 ns 121806
- #BM_Power_PCM16/17 7215.511103357703 ns 7184.062689844849 ns 97448
- #BM_Power_PCM16/18 7218.193087948947 ns 7183.934522282532 ns 97453
- #BM_Power_PCM16/19 8664.970089320619 ns 8623.758558669555 ns 81175
- #BM_Power_PCM16/20 11552.535965953697 ns 11497.46184930272 ns 60877
- #BM_Power_PCM16/21 9388.44799135153 ns 9344.99919894262 ns 74901
- #BM_Power_PCM16/22 17318.294772901812 ns 17250.841042955355 ns 40577
- #BM_Power_PCM24/0 726.1379773475074 ns 722.8207160621033 ns 968659
- #BM_Power_PCM24/1 726.2071634525643 ns 722.8022114316173 ns 968332
- #BM_Power_PCM24/2 1453.0095285897585 ns 1446.1379218461464 ns 484122
- #BM_Power_PCM24/3 2178.8762811180095 ns 2169.3027238608242 ns 322667
- #BM_Power_PCM24/4 2906.0304321240224 ns 2892.507449982226 ns 242014
- #BM_Power_PCM24/5 2905.6813512412673 ns 2892.5476900826334 ns 242000
- #BM_Power_PCM24/6 2905.133137183084 ns 2892.546958357953 ns 241991
- #BM_Power_PCM24/7 2904.9541637755087 ns 2892.5035497041163 ns 241992
- #BM_Power_PCM24/8 3631.119642357674 ns 3615.6040123552484 ns 193602
- #BM_Power_PCM24/9 3630.0929723231857 ns 3615.540825181041 ns 193606
- #BM_Power_PCM24/10 3633.1367688699947 ns 3615.7981963182615 ns 193604
- #BM_Power_PCM24/11 4359.021829536821 ns 4338.925567586662 ns 161341
- #BM_Power_PCM24/12 4359.9166083010205 ns 4338.934589518691 ns 161335
- #BM_Power_PCM24/13 4359.522224219495 ns 4338.965623895894 ns 161333
- #BM_Power_PCM24/14 5084.233240033068 ns 5062.102226947575 ns 138261
- #BM_Power_PCM24/15 5812.489557247782 ns 5785.279710396485 ns 120993
- #BM_Power_PCM24/16 5813.233703335974 ns 5785.328950087162 ns 120991
- #BM_Power_PCM24/17 7263.762055413714 ns 7236.184253515309 ns 96720
- #BM_Power_PCM24/18 7268.666932026325 ns 7235.481695528568 ns 96725
- #BM_Power_PCM24/19 8724.851571115321 ns 8682.925320365706 ns 80611
- #BM_Power_PCM24/20 11627.03445538921 ns 11574.680163349089 ns 60484
- #BM_Power_PCM24/21 9446.979585525633 ns 9401.467625609412 ns 74457
- #BM_Power_PCM24/22 17453.76751577347 ns 17376.240230398802 ns 40278
- #BM_Power_PCM32/0 707.1022650419995 ns 704.1584563736747 ns 993712
- #BM_Power_PCM32/1 707.6617193193007 ns 704.0883302075107 ns 994405
- #BM_Power_PCM32/2 1429.6476727153045 ns 1423.189700138329 ns 491560
- #BM_Power_PCM32/3 2155.1067918033104 ns 2145.802709002737 ns 326467
- #BM_Power_PCM32/4 2879.5403983345095 ns 2866.2732763350723 ns 244218
- #BM_Power_PCM32/5 2879.848013559709 ns 2866.3107704151535 ns 244206
- #BM_Power_PCM32/6 2877.9152711001893 ns 2866.44009237345 ns 244226
- #BM_Power_PCM32/7 2879.7659499561555 ns 2866.283334084033 ns 244217
- #BM_Power_PCM32/8 3602.6533810677543 ns 3585.3524729960113 ns 195249
- #BM_Power_PCM32/9 3600.7437134450106 ns 3585.2641581906905 ns 195258
- #BM_Power_PCM32/10 3601.967841229724 ns 3585.171620998731 ns 195250
- #BM_Power_PCM32/11 4323.25945603472 ns 4304.413593570312 ns 162621
- #BM_Power_PCM32/12 4324.284355266607 ns 4304.535958725075 ns 162617
- #BM_Power_PCM32/13 4324.75273181278 ns 4304.458329490036 ns 162621
- #BM_Power_PCM32/14 5045.495830640872 ns 5023.208295658413 ns 139350
- #BM_Power_PCM32/15 5766.477377491339 ns 5741.618567186731 ns 121914
- #BM_Power_PCM32/16 5768.763821447558 ns 5742.042194369716 ns 121912
- #BM_Power_PCM32/17 7213.064382792687 ns 7179.561977537295 ns 97495
- #BM_Power_PCM32/18 7213.149411254583 ns 7179.568925904638 ns 97496
- #BM_Power_PCM32/19 8659.201942841833 ns 8618.064860073133 ns 81221
- #BM_Power_PCM32/20 11554.875168393679 ns 11501.10802418425 ns 60866
- #BM_Power_PCM32/21 9382.646321933518 ns 9338.843445662977 ns 74958
- #BM_Power_PCM32/22 17333.68259326782 ns 17248.919274555337 ns 40582
- #BM_Power_FLOAT/0 701.9890055797536 ns 698.8782105756134 ns 1001417
- #BM_Power_FLOAT/1 702.0201509805795 ns 698.8716249878825 ns 1001589
- #BM_Power_FLOAT/2 1424.347226948063 ns 1417.6723436056996 ns 493752
- #BM_Power_FLOAT/3 2146.7867642580077 ns 2136.673361251187 ns 327628
- #BM_Power_FLOAT/4 2871.9139060421503 ns 2858.4790952007725 ns 244872
- #BM_Power_FLOAT/5 2871.9435091015116 ns 2858.480589164847 ns 244889
- #BM_Power_FLOAT/6 2871.525335064572 ns 2858.4684700136518 ns 244878
- #BM_Power_FLOAT/7 2872.250985165238 ns 2858.5650821004274 ns 244883
- #BM_Power_FLOAT/8 3594.7066331346323 ns 3577.8518291342452 ns 195639
- #BM_Power_FLOAT/9 3593.90243503874 ns 3578.0740988734437 ns 195644
- #BM_Power_FLOAT/10 3594.786827430224 ns 3577.8794729336846 ns 195649
- #BM_Power_FLOAT/11 4316.386219808971 ns 4296.510088082756 ns 162915
- #BM_Power_FLOAT/12 4315.539684876495 ns 4296.500868519076 ns 162921
- #BM_Power_FLOAT/13 4316.70090107445 ns 4296.524116722725 ns 162916
- #BM_Power_FLOAT/14 5038.922920398046 ns 5015.71531847824 ns 139570
- #BM_Power_FLOAT/15 5760.129063966319 ns 5734.677856142363 ns 122079
- #BM_Power_FLOAT/16 5761.676214194108 ns 5734.530059972358 ns 122056
- #BM_Power_FLOAT/17 7205.371829030732 ns 7172.0947809516465 ns 97604
- #BM_Power_FLOAT/18 7204.279098831734 ns 7172.448840759307 ns 97607
- #BM_Power_FLOAT/19 8648.461918826724 ns 8610.035399754119 ns 81300
- #BM_Power_FLOAT/20 11543.84161358285 ns 11492.703027516904 ns 60908
- #BM_Power_FLOAT/21 9375.01953072149 ns 9331.312011731832 ns 75010
- #BM_Power_FLOAT/22 17322.77400363915 ns 17240.430710872355 ns 40598
+ #BM_Power_PCM16/0 337.760142649551 ns 336.25662704492527 ns 2081433
+ #BM_Power_PCM16/1 337.9351210323131 ns 336.3553796166739 ns 2081152
+ #BM_Power_PCM16/2 672.5364912371965 ns 669.4070783764836 ns 1045607
+ #BM_Power_PCM16/3 1006.4414502095067 ns 1001.6641972800023 ns 698824
+ #BM_Power_PCM16/4 1339.75865716885 ns 1333.3746327354206 ns 524826
+ #BM_Power_PCM16/5 1339.747932594017 ns 1333.3662572435896 ns 525292
+ #BM_Power_PCM16/6 1339.081792726286 ns 1333.2895640083038 ns 525010
+ #BM_Power_PCM16/7 1338.8341052706744 ns 1333.1123506021647 ns 524857
+ #BM_Power_PCM16/8 1674.8266277119628 ns 1667.4998296698461 ns 419773
+ #BM_Power_PCM16/9 1675.1191347994438 ns 1667.2425555624354 ns 419978
+ #BM_Power_PCM16/10 1674.71266726399 ns 1666.9600314300715 ns 419980
+ #BM_Power_PCM16/11 2010.2484537350663 ns 2000.7651024934537 ns 349876
+ #BM_Power_PCM16/12 2009.7556108023364 ns 2000.7828645744019 ns 349860
+ #BM_Power_PCM16/13 2010.480003104438 ns 2000.914753011257 ns 349854
+ #BM_Power_PCM16/14 2344.500011676101 ns 2333.4616815513364 ns 299973
+ #BM_Power_PCM16/15 2679.159956909422 ns 2667.7558284453257 ns 262377
+ #BM_Power_PCM16/16 2680.335192003616 ns 2667.7298096639197 ns 262378
+ #BM_Power_PCM16/17 3349.3067194938394 ns 3333.070394636596 ns 210016
+ #BM_Power_PCM16/18 3347.5797958892135 ns 3332.993186265871 ns 210017
+ #BM_Power_PCM16/19 4017.7712421677643 ns 4000.1111676171067 ns 175006
+ #BM_Power_PCM16/20 5353.909304200122 ns 5332.241398226324 ns 131252
+ #BM_Power_PCM16/21 4351.829067676431 ns 4333.01033665303 ns 161561
+ #BM_Power_PCM16/22 8033.3449012838255 ns 7998.6481625377255 ns 87512
+ #BM_Power_PCM24/0 726.0724740108452 ns 722.793010652718 ns 968016
+ #BM_Power_PCM24/1 726.5141612892924 ns 723.0428633830344 ns 968379
+ #BM_Power_PCM24/2 1453.0771079215267 ns 1446.3117483448088 ns 483932
+ #BM_Power_PCM24/3 2179.805517686665 ns 2169.532832665915 ns 322636
+ #BM_Power_PCM24/4 2904.8858799182244 ns 2892.571683017743 ns 241982
+ #BM_Power_PCM24/5 2905.290520956667 ns 2892.6407436793943 ns 241986
+ #BM_Power_PCM24/6 2905.7997231004174 ns 2892.3390330578545 ns 242000
+ #BM_Power_PCM24/7 2906.2596490773803 ns 2892.7177675295025 ns 241992
+ #BM_Power_PCM24/8 3632.076809461512 ns 3615.545919869441 ns 193609
+ #BM_Power_PCM24/9 3632.5518469824924 ns 3615.6390283459427 ns 193608
+ #BM_Power_PCM24/10 3631.5230189943823 ns 3615.826974034231 ns 193601
+ #BM_Power_PCM24/11 4358.713629609595 ns 4338.867286915848 ns 161333
+ #BM_Power_PCM24/12 4359.231924050327 ns 4338.824437653807 ns 161333
+ #BM_Power_PCM24/13 4356.325355513422 ns 4339.28872069524 ns 161322
+ #BM_Power_PCM24/14 5086.698249103073 ns 5062.681608123064 ns 138273
+ #BM_Power_PCM24/15 5812.5060886912215 ns 5785.645307165088 ns 120961
+ #BM_Power_PCM24/16 5812.5832141796 ns 5785.508699569401 ns 120983
+ #BM_Power_PCM24/17 7263.989452119297 ns 7231.864374560889 ns 96796
+ #BM_Power_PCM24/18 7263.645282618586 ns 7231.993934070503 ns 96770
+ #BM_Power_PCM24/19 8717.248425410799 ns 8678.296803709562 ns 80656
+ #BM_Power_PCM24/20 11627.892071566856 ns 11574.479277610539 ns 60466
+ #BM_Power_PCM24/21 9446.526244343248 ns 9406.349543664368 ns 74397
+ #BM_Power_PCM24/22 17447.99081596151 ns 17374.784496239492 ns 40287
+ #BM_Power_PCM32/0 366.5107706071915 ns 364.8308462714918 ns 1919065
+ #BM_Power_PCM32/1 366.5168704771794 ns 364.7554321738571 ns 1918707
+ #BM_Power_PCM32/2 730.2924269058933 ns 727.1057881827118 ns 962962
+ #BM_Power_PCM32/3 1093.9912603896798 ns 1088.7929435922993 ns 642820
+ #BM_Power_PCM32/4 1456.8093080845226 ns 1449.9832478951175 ns 482805
+ #BM_Power_PCM32/5 1456.1232191254142 ns 1449.855881567137 ns 482839
+ #BM_Power_PCM32/6 1456.2008645317271 ns 1449.290242604125 ns 482803
+ #BM_Power_PCM32/7 1457.0090571482128 ns 1450.1991782065193 ns 482603
+ #BM_Power_PCM32/8 1819.8793533353248 ns 1811.1948393741498 ns 386426
+ #BM_Power_PCM32/9 1819.2882280461354 ns 1811.0842554160467 ns 386444
+ #BM_Power_PCM32/10 1822.4888939503587 ns 1811.2359906431732 ns 386456
+ #BM_Power_PCM32/11 2182.958268570418 ns 2172.377140481356 ns 322299
+ #BM_Power_PCM32/12 2181.462500754493 ns 2171.936179393034 ns 322153
+ #BM_Power_PCM32/13 2181.9423435499857 ns 2172.6346584016715 ns 322323
+ #BM_Power_PCM32/14 2545.0172319340754 ns 2533.932252353998 ns 276231
+ #BM_Power_PCM32/15 2910.0457980163624 ns 2897.3537004237564 ns 241648
+ #BM_Power_PCM32/16 2911.5361569110396 ns 2897.792890320363 ns 241558
+ #BM_Power_PCM32/17 3627.9648736853173 ns 3610.617067822403 ns 193815
+ #BM_Power_PCM32/18 3626.573311653762 ns 3610.4587441726208 ns 193912
+ #BM_Power_PCM32/19 4371.516904489751 ns 4351.228854700868 ns 160875
+ #BM_Power_PCM32/20 5833.295410225075 ns 5806.0874899420005 ns 120551
+ #BM_Power_PCM32/21 4733.5978237221925 ns 4713.533180254132 ns 148507
+ #BM_Power_PCM32/22 8761.069766861208 ns 8719.434463509126 ns 80253
+ #BM_Power_FLOAT/0 309.7573236829997 ns 308.3100559643444 ns 2270374
+ #BM_Power_FLOAT/1 309.82671751979507 ns 308.31727146129305 ns 2269889
+ #BM_Power_FLOAT/2 616.5533244216624 ns 613.6735468134156 ns 1140528
+ #BM_Power_FLOAT/3 924.1979909165235 ns 919.7778739898862 ns 761050
+ #BM_Power_FLOAT/4 1229.228159640607 ns 1223.550260462179 ns 572060
+ #BM_Power_FLOAT/5 1229.1571063156637 ns 1223.5816435152462 ns 572103
+ #BM_Power_FLOAT/6 1229.316886623843 ns 1223.9691527852942 ns 572110
+ #BM_Power_FLOAT/7 1230.6891998748167 ns 1223.8458668046858 ns 571882
+ #BM_Power_FLOAT/8 1537.1050916942645 ns 1530.2548960451468 ns 457410
+ #BM_Power_FLOAT/9 1537.624473698849 ns 1530.2998740768824 ns 457422
+ #BM_Power_FLOAT/10 1537.8752661993187 ns 1530.7318127550295 ns 457326
+ #BM_Power_FLOAT/11 1839.3751016921826 ns 1830.8065497602443 ns 382243
+ #BM_Power_FLOAT/12 1840.0720637536492 ns 1831.6316314246699 ns 382120
+ #BM_Power_FLOAT/13 1839.6525013051378 ns 1831.4300958752372 ns 382059
+ #BM_Power_FLOAT/14 2148.22182778337 ns 2138.606070233867 ns 327335
+ #BM_Power_FLOAT/15 2452.349344804406 ns 2441.433373571317 ns 286628
+ #BM_Power_FLOAT/16 2452.055538106065 ns 2441.4724420438542 ns 286596
+ #BM_Power_FLOAT/17 3069.5813045295163 ns 3056.235010106019 ns 229071
+ #BM_Power_FLOAT/18 3068.6837668400217 ns 3055.440662658508 ns 229198
+ #BM_Power_FLOAT/19 3672.409374171318 ns 3656.6498973574044 ns 191441
+ #BM_Power_FLOAT/20 4929.296286126466 ns 4905.616779854631 ns 142683
+ #BM_Power_FLOAT/21 4001.2961863843757 ns 3975.4695817490483 ns 176210
+ #BM_Power_FLOAT/22 7390.10927792279 ns 7357.815966854894 ns 95097
*/
diff --git a/audio_utils/include/audio_utils/intrinsic_utils.h b/audio_utils/include/audio_utils/intrinsic_utils.h
index 0c333e0e..beedd681 100644
--- a/audio_utils/include/audio_utils/intrinsic_utils.h
+++ b/audio_utils/include/audio_utils/intrinsic_utils.h
@@ -49,8 +49,54 @@ inline constexpr bool dependent_false_v = false;
template<typename T, size_t N>
struct internal_array_t {
T v[N];
+ static constexpr size_t size() { return N; }
};
+// Detect if the value is directly addressable as an array.
+// This is more advanced than std::is_array and works with neon intrinsics.
+template<typename T>
+concept is_array_like = requires(T a) {
+ a[0]; // can index first element
+};
+
+// Vector convert between type T to type S.
+template <typename S, typename T>
+inline S vconvert(const T& in) {
+ S out;
+
+ if constexpr (is_array_like<S>) {
+ if constexpr (is_array_like<T>) {
+#pragma unroll
+ // neon intrinsics need sizeof.
+ for (size_t i = 0; i < sizeof(in) / sizeof(in[0]); ++i) {
+ out[i] = in[i];
+ }
+ } else { /* constexpr */
+ const auto& [inv] = in;
+#pragma unroll
+ for (size_t i = 0; i < T::size(); ++i) {
+ out[i] = inv[i];
+ }
+ }
+ } else { /* constexpr */
+ auto& [outv] = out;
+ if constexpr (is_array_like<T>) {
+#pragma unroll
+ // neon intrinsics need sizeof.
+ for (size_t i = 0; i < sizeof(in) / sizeof(in[0]); ++i) {
+ outv[i] = in[i];
+ }
+ } else { /* constexpr */
+ const auto& [inv] = in;
+#pragma unroll
+ for (size_t i = 0; i < T::size(); ++i) {
+ outv[i] = inv[i];
+ }
+ }
+ }
+ return out;
+}
+
/*
Generalized template functions for the Neon instruction set.
@@ -117,6 +163,46 @@ static inline T vadd(T a, T b) {
}
}
+// add internally
+template<typename T>
+inline auto vaddv(const T& a) {
+ if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
+ return a;
+
+#ifdef USE_NEON
+ } else if constexpr (std::is_same_v<T, float32x2_t>) {
+ return vaddv_f32(a);
+#if defined(__aarch64__)
+ } else if constexpr (std::is_same_v<T, float32x4_t>) {
+ return vaddvq_f32(a);
+ } else if constexpr (std::is_same_v<T, float64x2_t>) {
+ return vaddvq_f64(a);
+#endif
+#endif // USE_NEON
+ } else if constexpr (is_array_like<T>) {
+ using ret_t = std::decay_t<decltype(a[0])>;
+
+ ret_t ret{};
+ // array_like is not the same as an array, so we use sizeof here
+ // to handle neon instrinsics.
+#pragma unroll
+ for (size_t i = 0; i < sizeof(a) / sizeof(a[0]); ++i) {
+ ret += a[i];
+ }
+ return ret;
+ } else /* constexpr */ {
+ const auto &[aval] = a;
+ using ret_t = std::decay_t<decltype(aval[0])>;
+ ret_t ret{};
+
+#pragma unroll
+ for (size_t i = 0; i < std::size(aval); ++i) {
+ ret += aval[i];
+ }
+ return ret;
+ }
+}
+
// duplicate float into all elements.
template<typename T, typename F>
static inline T vdupn(F f) {
@@ -264,7 +350,7 @@ static inline T vmla(T a, F b, T c) {
// fused multiply-add a + b * c
template<typename T>
-static inline T vmla(T a, T b, T c) {
+inline T vmla(const T& a, const T& b, const T& c) {
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
return a + b * c;
diff --git a/audio_utils/power.cpp b/audio_utils/power.cpp
index 58fa917f..699358f0 100644
--- a/audio_utils/power.cpp
+++ b/audio_utils/power.cpp
@@ -18,14 +18,12 @@
#define LOG_TAG "audio_utils_power"
#include <log/log.h>
-#include <algorithm>
-#include <math.h>
-
#include <audio_utils/power.h>
+
+#include <audio_utils/intrinsic_utils.h>
#include <audio_utils/primitives.h>
#if defined(__aarch64__) || defined(__ARM_NEON__)
-#include <arm_neon.h>
#define USE_NEON
#endif
@@ -154,44 +152,31 @@ inline void energy(const void *amplitudes, size_t size, size_t numChannels, floa
energyRef<FORMAT>(amplitudes, size, numChannels, out);
}
-// fast float power computation for ARM processors that support NEON.
+// TODO(b/323611666) in some cases having a large kVectorWidth generic internal array is
+// faster than the NEON intrinsic version. Optimize this.
#ifdef USE_NEON
+// The type conversion appears faster if we use a neon accumulator type.
+constexpr size_t kVectorWidth = 4;
+using AccumulatorType = float32x4_t;
+#else
+constexpr size_t kVectorWidth = 4;
+using AccumulatorType = android::audio_utils::intrinsics::internal_array_t<float, kVectorWidth>;
+#endif
-template <typename T>
-float32x4_t convertToFloatVectorAmplitude(T vamplitude) = delete;
-
-template <>
-float32x4_t convertToFloatVectorAmplitude<float32x4_t>(float32x4_t vamplitude) {
- return vamplitude;
-}
-
-template <>
-float32x4_t convertToFloatVectorAmplitude<int16x4_t>(int16x4_t vamplitude) {
- const int32x4_t iamplitude = vmovl_s16(vamplitude); // expand s16 to s32 first
- return vcvtq_f32_s32(iamplitude);
-}
-
-template <>
-float32x4_t convertToFloatVectorAmplitude<int32x4_t>(int32x4_t vamplitude) {
- return vcvtq_f32_s32(vamplitude);
-}
-
-template <typename Vector, typename Scalar>
+template <typename Scalar, size_t N>
inline float energyMonoVector(const void *amplitudes, size_t size)
-{
- static_assert(sizeof(Vector) % sizeof(Scalar) == 0,
- "Vector size must be a multiple of scalar size");
- const size_t vectorLength = sizeof(Vector) / sizeof(Scalar); // typically 4 (a const)
-
- // check pointer validity, must be aligned with scalar type.
+{ // check pointer validity, must be aligned with scalar type.
const Scalar *samplitudes = reinterpret_cast<const Scalar *>(amplitudes);
LOG_ALWAYS_FATAL_IF((uintptr_t)samplitudes % alignof(Scalar) != 0,
"Non-element aligned address: %p %zu", samplitudes, alignof(Scalar));
float accumulator = 0;
+ // seems that loading input data is fine using our generic intrinsic.
+ using Vector = android::audio_utils::intrinsics::internal_array_t<Scalar, N>;
+
// handle pointer unaligned to vector type.
- while ((uintptr_t)samplitudes % alignof(Vector) != 0 /* compiler optimized */ && size > 0) {
+ while ((uintptr_t)samplitudes % sizeof(Vector) != 0 /* compiler optimized */ && size > 0) {
const float amp = (float)*samplitudes++;
accumulator += amp * amp;
--size;
@@ -201,21 +186,18 @@ inline float energyMonoVector(const void *amplitudes, size_t size)
const Vector *vamplitudes = reinterpret_cast<const Vector *>(samplitudes);
// clear vector accumulator
- float32x4_t accum = vdupq_n_f32(0);
+ AccumulatorType accum{};
// iterate over array getting sum of squares in vectorLength lanes.
size_t i;
- for (i = 0; i < size - size % vectorLength /* compiler optimized */; i += vectorLength) {
- const float32x4_t famplitude = convertToFloatVectorAmplitude(*vamplitudes++);
- accum = vmlaq_f32(accum, famplitude, famplitude);
+ const size_t limit = size - size % N;
+ for (i = 0; i < limit; i += N) {
+ const auto famplitude = vconvert<AccumulatorType>(*vamplitudes++);
+ accum = android::audio_utils::intrinsics::vmla(accum, famplitude, famplitude);
}
- // narrow vectorLength lanes of floats
- float32x2_t accum2 = vadd_f32(vget_low_f32(accum), vget_high_f32(accum)); // get stereo volume
- accum2 = vpadd_f32(accum2, accum2); // combine to mono
-
- // accumulate vector
- accumulator += vget_lane_f32(accum2, 0);
+ // add all components of the vector.
+ accumulator += android::audio_utils::intrinsics::vaddv(accum);
// accumulate any trailing elements too small for vector size
for (; i < size; ++i) {
@@ -228,13 +210,13 @@ inline float energyMonoVector(const void *amplitudes, size_t size)
template <>
inline float energyMono<AUDIO_FORMAT_PCM_FLOAT>(const void *amplitudes, size_t size)
{
- return energyMonoVector<float32x4_t, float>(amplitudes, size);
+ return energyMonoVector<float, kVectorWidth>(amplitudes, size);
}
template <>
inline float energyMono<AUDIO_FORMAT_PCM_16_BIT>(const void *amplitudes, size_t size)
{
- return energyMonoVector<int16x4_t, int16_t>(amplitudes, size)
+ return energyMonoVector<int16_t, kVectorWidth>(amplitudes, size)
* normalizeEnergy<AUDIO_FORMAT_PCM_16_BIT>();
}
@@ -242,7 +224,7 @@ inline float energyMono<AUDIO_FORMAT_PCM_16_BIT>(const void *amplitudes, size_t
template <>
inline float energyMono<AUDIO_FORMAT_PCM_32_BIT>(const void *amplitudes, size_t size)
{
- return energyMonoVector<int32x4_t, int32_t>(amplitudes, size)
+ return energyMonoVector<int32_t, kVectorWidth>(amplitudes, size)
* normalizeEnergy<AUDIO_FORMAT_PCM_32_BIT>();
}
@@ -250,12 +232,10 @@ inline float energyMono<AUDIO_FORMAT_PCM_32_BIT>(const void *amplitudes, size_t
template <>
inline float energyMono<AUDIO_FORMAT_PCM_8_24_BIT>(const void *amplitudes, size_t size)
{
- return energyMonoVector<int32x4_t, int32_t>(amplitudes, size)
+ return energyMonoVector<int32_t, kVectorWidth>(amplitudes, size)
* normalizeEnergy<AUDIO_FORMAT_PCM_8_24_BIT>();
}
-#endif // USE_NEON
-
} // namespace
float audio_utils_compute_energy_mono(const void *buffer, audio_format_t format, size_t samples)