diff options
author | Andy Hung <hunga@google.com> | 2024-02-01 18:25:45 -0800 |
---|---|---|
committer | Andy Hung <hunga@google.com> | 2024-03-12 14:07:24 -0700 |
commit | 4a0897fd6085f6e2e1618d4c466982a4af4b445e (patch) | |
tree | 87be6b5066ad1b53cbd2aff26d5d52c03d6f1ce3 | |
parent | f33da679600164b3f8872dc901fdea576eeed63b (diff) | |
download | media-4a0897fd6085f6e2e1618d4c466982a4af4b445e.tar.gz |
power: Use intrinsic optimizations
Use general intrinsic_utils library for more
portable optimization.
Test: atest audio_power_benchmark
Test: atest power_tests
Bug: 323610467
Merged-In: I9a7df0257fc83ba7bf22316247170d1bd95c2700
Change-Id: I9a7df0257fc83ba7bf22316247170d1bd95c2700
-rw-r--r-- | audio_utils/benchmarks/audio_power_benchmark.cpp | 371 | ||||
-rw-r--r-- | audio_utils/include/audio_utils/intrinsic_utils.h | 88 | ||||
-rw-r--r-- | audio_utils/power.cpp | 76 |
3 files changed, 300 insertions, 235 deletions
diff --git a/audio_utils/benchmarks/audio_power_benchmark.cpp b/audio_utils/benchmarks/audio_power_benchmark.cpp index 130aec09..b355d976 100644 --- a/audio_utils/benchmarks/audio_power_benchmark.cpp +++ b/audio_utils/benchmarks/audio_power_benchmark.cpp @@ -28,194 +28,193 @@ Pixel 7 (USE_NEON code) 1024 frames ------------------------------------------------------------------------------------ Benchmark Time CPU Iteration ------------------------------------------------------------------------------------ -audio_power_benchmark: - #BM_Power_PCM16/0 182.2241901911555 ns 181.4585420030687 ns 3856373 - #BM_Power_PCM16/1 182.3181837746463 ns 181.48175238693838 ns 3856614 - #BM_Power_PCM16/2 368.0452274813401 ns 366.3017598457351 ns 1912554 - #BM_Power_PCM16/3 554.2789131893876 ns 551.5793025147027 ns 1272715 - #BM_Power_PCM16/4 734.544472135247 ns 730.801475058127 ns 957386 - #BM_Power_PCM16/5 734.432967920759 ns 731.1678748377902 ns 957094 - #BM_Power_PCM16/6 733.8069997996705 ns 730.5650734403838 ns 958056 - #BM_Power_PCM16/7 731.1269183224047 ns 727.9060141898467 ns 957868 - #BM_Power_PCM16/8 915.4654669581546 ns 910.8886686647353 ns 768409 - #BM_Power_PCM16/9 916.2991591460016 ns 911.7267578221812 ns 766602 - #BM_Power_PCM16/10 916.2899150794203 ns 912.0552662308543 ns 767304 - #BM_Power_PCM16/11 1096.5960966896591 ns 1091.2493295035579 ns 640943 - #BM_Power_PCM16/12 1097.8511309779592 ns 1091.0261081397603 ns 640375 - #BM_Power_PCM16/13 1097.9944254893203 ns 1091.1471948274514 ns 641850 - #BM_Power_PCM16/14 1279.808649441679 ns 1273.7557073893663 ns 550602 - #BM_Power_PCM16/15 1458.9768502853626 ns 1451.9896279814072 ns 481777 - #BM_Power_PCM16/16 1458.2711695604448 ns 1451.9060239613882 ns 481942 - #BM_Power_PCM16/17 1819.4835449076445 ns 1812.0266937315016 ns 386233 - #BM_Power_PCM16/18 1820.3459564206057 ns 1812.1602221110884 ns 385933 - #BM_Power_PCM16/19 2181.369784896039 ns 2171.339075235256 ns 322309 - #BM_Power_PCM16/20 2905.2177625941545 ns 2891.0355549312485 ns 242048 - #BM_Power_PCM16/21 2364.132933388578 ns 2353.0530248033974 ns 297540 - #BM_Power_PCM16/22 4349.369949027189 ns 4330.079046346484 ns 161652 - #BM_Power_PCM24/0 726.8133229852367 ns 723.3602813367419 ns 967666 - #BM_Power_PCM24/1 726.8365225340423 ns 723.350181307502 ns 967693 - #BM_Power_PCM24/2 1453.469049006606 ns 1446.6400141353229 ns 483894 - #BM_Power_PCM24/3 2180.028380578979 ns 2169.873979821151 ns 322615 - #BM_Power_PCM24/4 2906.8811984734084 ns 2892.9251947515163 ns 241975 - #BM_Power_PCM24/5 2905.392592651063 ns 2892.431327900298 ns 241976 - #BM_Power_PCM24/6 2906.3533070213225 ns 2892.6723102671663 ns 241985 - #BM_Power_PCM24/7 2906.669533308557 ns 2892.9162117373876 ns 241979 - #BM_Power_PCM24/8 3634.8299686929113 ns 3616.1580312219085 ns 193582 - #BM_Power_PCM24/9 3646.2416687724785 ns 3615.1269559198017 ns 193579 - #BM_Power_PCM24/10 3641.1275370266394 ns 3615.927121699737 ns 193583 - #BM_Power_PCM24/11 4355.066508388703 ns 4339.191726899678 ns 161318 - #BM_Power_PCM24/12 4360.341786985582 ns 4339.590976436824 ns 161311 - #BM_Power_PCM24/13 4358.200731467333 ns 4339.247979171812 ns 161320 - #BM_Power_PCM24/14 5086.769189048403 ns 5062.498152152655 ns 138269 - #BM_Power_PCM24/15 5810.423653580803 ns 5785.767373623829 ns 120988 - #BM_Power_PCM24/16 5812.39702625609 ns 5786.230130917745 ns 120992 - #BM_Power_PCM24/17 7265.510668878706 ns 7232.779768427578 ns 96730 - #BM_Power_PCM24/18 7264.246771353663 ns 7231.77107139167 ns 96790 - #BM_Power_PCM24/19 8722.56894669041 ns 8684.567792613941 ns 80584 - #BM_Power_PCM24/20 11628.424309578584 ns 11575.683942450829 ns 60470 - #BM_Power_PCM24/21 9448.977902926888 ns 9406.63598973104 ns 74399 - #BM_Power_PCM24/22 17459.032419808307 ns 17375.671358355692 ns 40284 - #BM_Power_PCM32/0 180.64821432650652 ns 179.97205866590213 ns 3890437 - #BM_Power_PCM32/1 180.59328255286601 ns 180.00214854248506 ns 3890079 - #BM_Power_PCM32/2 367.3883165270478 ns 365.8843055170383 ns 1910990 - #BM_Power_PCM32/3 547.961739191163 ns 545.3393246259511 ns 1283585 - #BM_Power_PCM32/4 732.3382747450341 ns 729.2189395984298 ns 963220 - #BM_Power_PCM32/5 729.9914091395625 ns 726.6235749813377 ns 963233 - #BM_Power_PCM32/6 730.3738877114395 ns 726.9318824314772 ns 963217 - #BM_Power_PCM32/7 729.978907085062 ns 726.8274224511407 ns 963167 - #BM_Power_PCM32/8 912.4335730958729 ns 908.6304864721727 ns 770486 - #BM_Power_PCM32/9 915.197196607744 ns 910.899041307122 ns 770424 - #BM_Power_PCM32/10 912.5102563972879 ns 908.5750461421018 ns 770446 - #BM_Power_PCM32/11 1098.4029936865272 ns 1093.6704840140765 ns 641283 - #BM_Power_PCM32/12 1096.636110073461 ns 1091.8296782198036 ns 639971 - #BM_Power_PCM32/13 1096.700748278668 ns 1091.723232471321 ns 641206 - #BM_Power_PCM32/14 1278.092437845839 ns 1273.2958269366497 ns 549764 - #BM_Power_PCM32/15 1461.4543514169204 ns 1454.7416748932296 ns 481165 - #BM_Power_PCM32/16 1461.7273813451243 ns 1454.7411392391905 ns 481251 - #BM_Power_PCM32/17 1825.907444290439 ns 1817.8784066977225 ns 385087 - #BM_Power_PCM32/18 1825.381231595566 ns 1817.6661308667603 ns 385094 - #BM_Power_PCM32/19 2190.975242630068 ns 2180.597490904206 ns 320753 - #BM_Power_PCM32/20 2928.6714986526026 ns 2914.821292933981 ns 240136 - #BM_Power_PCM32/21 2372.874287158484 ns 2361.940219335247 ns 296350 - #BM_Power_PCM32/22 4388.368818947491 ns 4369.06687184351 ns 160187 - #BM_Power_FLOAT/0 168.00676835156023 ns 167.22008168082328 ns 4185805 - #BM_Power_FLOAT/1 167.9938065702452 ns 167.21758154288213 ns 4186049 - #BM_Power_FLOAT/2 351.86860213833324 ns 350.24728499757555 ns 1998617 - #BM_Power_FLOAT/3 532.7485100122135 ns 530.3216093916758 ns 1319977 - #BM_Power_FLOAT/4 716.865648210546 ns 713.9932490536078 ns 980307 - #BM_Power_FLOAT/5 717.0498470928635 ns 713.9746434393012 ns 980338 - #BM_Power_FLOAT/6 717.1775874821899 ns 713.9746211792758 ns 980345 - #BM_Power_FLOAT/7 717.286950836443 ns 713.9845488011172 ns 980377 - #BM_Power_FLOAT/8 899.506933295295 ns 895.4523624630964 ns 781663 - #BM_Power_FLOAT/9 899.6416845109553 ns 895.4465512345158 ns 781758 - #BM_Power_FLOAT/10 899.7137607146544 ns 895.4340975403337 ns 781769 - #BM_Power_FLOAT/11 1080.3481243192064 ns 1075.391753733447 ns 650937 - #BM_Power_FLOAT/12 1080.3754966280014 ns 1075.4162669575685 ns 650890 - #BM_Power_FLOAT/13 1080.4839543269304 ns 1075.418525654029 ns 650892 - #BM_Power_FLOAT/14 1260.9659337690678 ns 1255.3850934907016 ns 557649 - #BM_Power_FLOAT/15 1441.347092340657 ns 1435.2933780849712 ns 487729 - #BM_Power_FLOAT/16 1441.0252178558349 ns 1435.2747308851679 ns 487710 - #BM_Power_FLOAT/17 1803.174621574897 ns 1795.239832164671 ns 389906 - #BM_Power_FLOAT/18 1803.6167320222003 ns 1795.1778607571464 ns 389923 - #BM_Power_FLOAT/19 2164.5316780757366 ns 2155.090783692497 ns 324783 - #BM_Power_FLOAT/20 2895.0966589259547 ns 2881.551857432239 ns 242916 - #BM_Power_FLOAT/21 2346.6239754749067 ns 2335.133450977916 ns 299773 - #BM_Power_FLOAT/22 4336.788207490892 ns 4319.045067224524 ns 162069 + #BM_Power_PCM16/0 180.97891633448947 ns 180.1272276967831 ns 3885852 + #BM_Power_PCM16/1 180.947387663606 ns 180.12366729181554 ns 3886541 + #BM_Power_PCM16/2 366.788606340951 ns 365.0981633489325 ns 1915715 + #BM_Power_PCM16/3 548.7117372721175 ns 546.1077741464976 ns 1282070 + #BM_Power_PCM16/4 729.3091325787765 ns 725.8914895381871 ns 964460 + #BM_Power_PCM16/5 729.5107214636998 ns 725.9903921261499 ns 964001 + #BM_Power_PCM16/6 729.3142860987979 ns 725.7270789310516 ns 964462 + #BM_Power_PCM16/7 728.8285153846795 ns 725.6857445895254 ns 964518 + #BM_Power_PCM16/8 910.266543588675 ns 905.9784034899902 ns 773412 + #BM_Power_PCM16/9 909.7631943495237 ns 905.5980073450368 ns 773039 + #BM_Power_PCM16/10 909.4817518460887 ns 905.3046969675422 ns 773201 + #BM_Power_PCM16/11 1090.5752804035608 ns 1085.4067030965386 ns 645075 + #BM_Power_PCM16/12 1090.1430777061082 ns 1085.3985691918976 ns 645090 + #BM_Power_PCM16/13 1090.074475934731 ns 1085.1795632613716 ns 645054 + #BM_Power_PCM16/14 1271.0181027106237 ns 1265.0655742741403 ns 553342 + #BM_Power_PCM16/15 1450.921852464041 ns 1444.5727666390078 ns 484494 + #BM_Power_PCM16/16 1451.2240897596057 ns 1444.6554862570126 ns 484538 + #BM_Power_PCM16/17 1812.7695622285537 ns 1804.1137781191355 ns 387992 + #BM_Power_PCM16/18 1812.6019333363201 ns 1804.0487200830873 ns 388033 + #BM_Power_PCM16/19 2172.960068355248 ns 2163.5280680444976 ns 323553 + #BM_Power_PCM16/20 2896.932275617628 ns 2882.070808558297 ns 242852 + #BM_Power_PCM16/21 2354.0114820280833 ns 2343.273892215966 ns 298727 + #BM_Power_PCM16/22 4341.411563591838 ns 4320.3065557379705 ns 162026 + #BM_Power_PCM24/0 726.5703267242005 ns 723.3638876888978 ns 967598 + #BM_Power_PCM24/1 726.8478512196587 ns 723.3996910023831 ns 967645 + #BM_Power_PCM24/2 1453.522365501612 ns 1446.817841538075 ns 483826 + #BM_Power_PCM24/3 2180.4783136728597 ns 2170.2972985542474 ns 322531 + #BM_Power_PCM24/4 2907.3817241064085 ns 2893.78803763662 ns 241892 + #BM_Power_PCM24/5 2907.93437008585 ns 2893.9130060933658 ns 241902 + #BM_Power_PCM24/6 2907.476982094115 ns 2893.869563779754 ns 241942 + #BM_Power_PCM24/7 2906.2742418195558 ns 2893.721987961765 ns 241896 + #BM_Power_PCM24/8 3634.429566572946 ns 3617.2687167986146 ns 193516 + #BM_Power_PCM24/9 3637.612631382197 ns 3617.0655648749726 ns 193503 + #BM_Power_PCM24/10 3637.7966951345143 ns 3617.4579242709196 ns 193532 + #BM_Power_PCM24/11 4360.471518019849 ns 4340.702796725793 ns 161260 + #BM_Power_PCM24/12 4359.483557914242 ns 4340.688663859648 ns 161263 + #BM_Power_PCM24/13 4361.102585926522 ns 4340.699237256582 ns 161260 + #BM_Power_PCM24/14 5087.945822701728 ns 5064.266827288351 ns 138213 + #BM_Power_PCM24/15 5814.759977572986 ns 5787.690635285585 ns 120922 + #BM_Power_PCM24/16 5815.644749452265 ns 5787.2178931701865 ns 120940 + #BM_Power_PCM24/17 7266.470565121046 ns 7234.610670139329 ns 96756 + #BM_Power_PCM24/18 7268.525628728221 ns 7234.5110649425615 ns 96747 + #BM_Power_PCM24/19 8716.083166871316 ns 8682.350888726245 ns 80621 + #BM_Power_PCM24/20 11639.32870672943 ns 11580.26249875915 ns 60446 + #BM_Power_PCM24/21 9446.785352486686 ns 9405.332101734537 ns 74429 + #BM_Power_PCM24/22 17463.72223575373 ns 17383.809277838518 ns 40268 + #BM_Power_PCM32/0 179.84309567370832 ns 179.0676549126645 ns 3908541 + #BM_Power_PCM32/1 179.9404439204146 ns 179.0010033549121 ns 3911876 + #BM_Power_PCM32/2 367.16608437351766 ns 365.42729213737346 ns 1915123 + #BM_Power_PCM32/3 550.277695550067 ns 547.9387707497883 ns 1277543 + #BM_Power_PCM32/4 731.4224075405458 ns 728.3597142669796 ns 960757 + #BM_Power_PCM32/5 731.8896053180963 ns 728.7127664668686 ns 960626 + #BM_Power_PCM32/6 731.6139555435336 ns 728.404691132571 ns 960493 + #BM_Power_PCM32/7 731.4526007701458 ns 728.5303351299617 ns 952526 + #BM_Power_PCM32/8 914.9957172964164 ns 910.4922825617709 ns 768908 + #BM_Power_PCM32/9 914.9228864106034 ns 910.3200879900779 ns 768723 + #BM_Power_PCM32/10 914.4295313745796 ns 910.2645292119196 ns 768796 + #BM_Power_PCM32/11 1097.4819073401923 ns 1092.6099713796307 ns 640453 + #BM_Power_PCM32/12 1098.341821642604 ns 1093.1572358279782 ns 640293 + #BM_Power_PCM32/13 1098.2661898965437 ns 1092.878152356299 ns 636508 + #BM_Power_PCM32/14 1280.2780676971747 ns 1274.5237323564186 ns 549208 + #BM_Power_PCM32/15 1462.3197864285032 ns 1455.9130929404507 ns 480824 + #BM_Power_PCM32/16 1462.7091956393392 ns 1455.9862690566056 ns 480739 + #BM_Power_PCM32/17 1827.1057526954876 ns 1818.949706155908 ns 384898 + #BM_Power_PCM32/18 1827.7309048929412 ns 1819.2175362902751 ns 384745 + #BM_Power_PCM32/19 2191.6910336275487 ns 2182.0570090137303 ns 320844 + #BM_Power_PCM32/20 2932.1382076410487 ns 2917.764512203257 ns 239936 + #BM_Power_PCM32/21 2384.572833177335 ns 2374.048604969875 ns 296266 + #BM_Power_PCM32/22 4396.192134082954 ns 4376.258719439749 ns 159930 + #BM_Power_FLOAT/0 167.25444234000543 ns 166.47183513094348 ns 4204937 + #BM_Power_FLOAT/1 167.24129258362885 ns 166.4590919078933 ns 4205036 + #BM_Power_FLOAT/2 351.4320774910175 ns 349.8888578977785 ns 2000574 + #BM_Power_FLOAT/3 532.5129107539586 ns 530.0978074251175 ns 1320411 + #BM_Power_FLOAT/4 716.8967761364072 ns 713.6935400332438 ns 980810 + #BM_Power_FLOAT/5 716.7889045409055 ns 713.7320038176421 ns 980708 + #BM_Power_FLOAT/6 717.0752796480707 ns 713.6972366395144 ns 980690 + #BM_Power_FLOAT/7 716.6102092628516 ns 713.7024690577491 ns 980698 + #BM_Power_FLOAT/8 899.5296958205339 ns 894.9512572444829 ns 782147 + #BM_Power_FLOAT/9 899.2820069645962 ns 894.9592230725831 ns 782158 + #BM_Power_FLOAT/10 899.3434261268515 ns 894.9564490846954 ns 782142 + #BM_Power_FLOAT/11 1079.2320050706119 ns 1074.9236482969181 ns 651197 + #BM_Power_FLOAT/12 1079.995203673193 ns 1074.9423856936505 ns 651123 + #BM_Power_FLOAT/13 1080.2950176525237 ns 1074.9553632314376 ns 651100 + #BM_Power_FLOAT/14 1260.4052233815107 ns 1254.9116087466343 ns 557838 + #BM_Power_FLOAT/15 1441.558180305023 ns 1434.8599962279143 ns 487794 + #BM_Power_FLOAT/16 1442.0232631999454 ns 1434.885356768729 ns 487809 + #BM_Power_FLOAT/17 1802.938510952263 ns 1794.8471734733112 ns 390037 + #BM_Power_FLOAT/18 1802.3492608362737 ns 1794.8067419071836 ns 390038 + #BM_Power_FLOAT/19 2163.5362062414747 ns 2154.6858791842915 ns 324875 + #BM_Power_FLOAT/20 2894.779758388722 ns 2882.248193812651 ns 242915 + #BM_Power_FLOAT/21 2345.738662867257 ns 2334.692247141554 ns 299812 + #BM_Power_FLOAT/22 4341.005108230952 ns 4318.48899075213 ns 162091 -Pixel 7 (without NEON) +Pixel 7 (generic intrinsics) audio_power_benchmark: - #BM_Power_PCM16/0 710.0176225862541 ns 706.5369398066563 ns 990774 - #BM_Power_PCM16/1 709.5382576613781 ns 706.4966294743109 ns 990795 - #BM_Power_PCM16/2 1432.22333510522 ns 1425.8151113120955 ns 490917 - #BM_Power_PCM16/3 2154.9135527473695 ns 2144.662213834012 ns 326384 - #BM_Power_PCM16/4 2878.298555528755 ns 2864.2879572796446 ns 244380 - #BM_Power_PCM16/5 2876.5968518680247 ns 2864.1508803227484 ns 244399 - #BM_Power_PCM16/6 2876.786253481042 ns 2864.19396835658 ns 244411 - #BM_Power_PCM16/7 2877.9274599737573 ns 2864.401922234998 ns 244403 - #BM_Power_PCM16/8 3599.2718202583505 ns 3583.2360171183445 ns 195346 - #BM_Power_PCM16/9 3598.8521185858167 ns 3583.090189695244 ns 195366 - #BM_Power_PCM16/10 3607.3259044670376 ns 3583.0856094265077 ns 195364 - #BM_Power_PCM16/11 4322.178672831243 ns 4301.900640355943 ns 162722 - #BM_Power_PCM16/12 4322.413127245351 ns 4301.978619057853 ns 162715 - #BM_Power_PCM16/13 4321.293715661808 ns 4301.96961074716 ns 162722 - #BM_Power_PCM16/14 5045.423164118614 ns 5021.725069589923 ns 139388 - #BM_Power_PCM16/15 5771.999819383823 ns 5746.762121734573 ns 121806 - #BM_Power_PCM16/16 5772.041574304546 ns 5746.705014531301 ns 121806 - #BM_Power_PCM16/17 7215.511103357703 ns 7184.062689844849 ns 97448 - #BM_Power_PCM16/18 7218.193087948947 ns 7183.934522282532 ns 97453 - #BM_Power_PCM16/19 8664.970089320619 ns 8623.758558669555 ns 81175 - #BM_Power_PCM16/20 11552.535965953697 ns 11497.46184930272 ns 60877 - #BM_Power_PCM16/21 9388.44799135153 ns 9344.99919894262 ns 74901 - #BM_Power_PCM16/22 17318.294772901812 ns 17250.841042955355 ns 40577 - #BM_Power_PCM24/0 726.1379773475074 ns 722.8207160621033 ns 968659 - #BM_Power_PCM24/1 726.2071634525643 ns 722.8022114316173 ns 968332 - #BM_Power_PCM24/2 1453.0095285897585 ns 1446.1379218461464 ns 484122 - #BM_Power_PCM24/3 2178.8762811180095 ns 2169.3027238608242 ns 322667 - #BM_Power_PCM24/4 2906.0304321240224 ns 2892.507449982226 ns 242014 - #BM_Power_PCM24/5 2905.6813512412673 ns 2892.5476900826334 ns 242000 - #BM_Power_PCM24/6 2905.133137183084 ns 2892.546958357953 ns 241991 - #BM_Power_PCM24/7 2904.9541637755087 ns 2892.5035497041163 ns 241992 - #BM_Power_PCM24/8 3631.119642357674 ns 3615.6040123552484 ns 193602 - #BM_Power_PCM24/9 3630.0929723231857 ns 3615.540825181041 ns 193606 - #BM_Power_PCM24/10 3633.1367688699947 ns 3615.7981963182615 ns 193604 - #BM_Power_PCM24/11 4359.021829536821 ns 4338.925567586662 ns 161341 - #BM_Power_PCM24/12 4359.9166083010205 ns 4338.934589518691 ns 161335 - #BM_Power_PCM24/13 4359.522224219495 ns 4338.965623895894 ns 161333 - #BM_Power_PCM24/14 5084.233240033068 ns 5062.102226947575 ns 138261 - #BM_Power_PCM24/15 5812.489557247782 ns 5785.279710396485 ns 120993 - #BM_Power_PCM24/16 5813.233703335974 ns 5785.328950087162 ns 120991 - #BM_Power_PCM24/17 7263.762055413714 ns 7236.184253515309 ns 96720 - #BM_Power_PCM24/18 7268.666932026325 ns 7235.481695528568 ns 96725 - #BM_Power_PCM24/19 8724.851571115321 ns 8682.925320365706 ns 80611 - #BM_Power_PCM24/20 11627.03445538921 ns 11574.680163349089 ns 60484 - #BM_Power_PCM24/21 9446.979585525633 ns 9401.467625609412 ns 74457 - #BM_Power_PCM24/22 17453.76751577347 ns 17376.240230398802 ns 40278 - #BM_Power_PCM32/0 707.1022650419995 ns 704.1584563736747 ns 993712 - #BM_Power_PCM32/1 707.6617193193007 ns 704.0883302075107 ns 994405 - #BM_Power_PCM32/2 1429.6476727153045 ns 1423.189700138329 ns 491560 - #BM_Power_PCM32/3 2155.1067918033104 ns 2145.802709002737 ns 326467 - #BM_Power_PCM32/4 2879.5403983345095 ns 2866.2732763350723 ns 244218 - #BM_Power_PCM32/5 2879.848013559709 ns 2866.3107704151535 ns 244206 - #BM_Power_PCM32/6 2877.9152711001893 ns 2866.44009237345 ns 244226 - #BM_Power_PCM32/7 2879.7659499561555 ns 2866.283334084033 ns 244217 - #BM_Power_PCM32/8 3602.6533810677543 ns 3585.3524729960113 ns 195249 - #BM_Power_PCM32/9 3600.7437134450106 ns 3585.2641581906905 ns 195258 - #BM_Power_PCM32/10 3601.967841229724 ns 3585.171620998731 ns 195250 - #BM_Power_PCM32/11 4323.25945603472 ns 4304.413593570312 ns 162621 - #BM_Power_PCM32/12 4324.284355266607 ns 4304.535958725075 ns 162617 - #BM_Power_PCM32/13 4324.75273181278 ns 4304.458329490036 ns 162621 - #BM_Power_PCM32/14 5045.495830640872 ns 5023.208295658413 ns 139350 - #BM_Power_PCM32/15 5766.477377491339 ns 5741.618567186731 ns 121914 - #BM_Power_PCM32/16 5768.763821447558 ns 5742.042194369716 ns 121912 - #BM_Power_PCM32/17 7213.064382792687 ns 7179.561977537295 ns 97495 - #BM_Power_PCM32/18 7213.149411254583 ns 7179.568925904638 ns 97496 - #BM_Power_PCM32/19 8659.201942841833 ns 8618.064860073133 ns 81221 - #BM_Power_PCM32/20 11554.875168393679 ns 11501.10802418425 ns 60866 - #BM_Power_PCM32/21 9382.646321933518 ns 9338.843445662977 ns 74958 - #BM_Power_PCM32/22 17333.68259326782 ns 17248.919274555337 ns 40582 - #BM_Power_FLOAT/0 701.9890055797536 ns 698.8782105756134 ns 1001417 - #BM_Power_FLOAT/1 702.0201509805795 ns 698.8716249878825 ns 1001589 - #BM_Power_FLOAT/2 1424.347226948063 ns 1417.6723436056996 ns 493752 - #BM_Power_FLOAT/3 2146.7867642580077 ns 2136.673361251187 ns 327628 - #BM_Power_FLOAT/4 2871.9139060421503 ns 2858.4790952007725 ns 244872 - #BM_Power_FLOAT/5 2871.9435091015116 ns 2858.480589164847 ns 244889 - #BM_Power_FLOAT/6 2871.525335064572 ns 2858.4684700136518 ns 244878 - #BM_Power_FLOAT/7 2872.250985165238 ns 2858.5650821004274 ns 244883 - #BM_Power_FLOAT/8 3594.7066331346323 ns 3577.8518291342452 ns 195639 - #BM_Power_FLOAT/9 3593.90243503874 ns 3578.0740988734437 ns 195644 - #BM_Power_FLOAT/10 3594.786827430224 ns 3577.8794729336846 ns 195649 - #BM_Power_FLOAT/11 4316.386219808971 ns 4296.510088082756 ns 162915 - #BM_Power_FLOAT/12 4315.539684876495 ns 4296.500868519076 ns 162921 - #BM_Power_FLOAT/13 4316.70090107445 ns 4296.524116722725 ns 162916 - #BM_Power_FLOAT/14 5038.922920398046 ns 5015.71531847824 ns 139570 - #BM_Power_FLOAT/15 5760.129063966319 ns 5734.677856142363 ns 122079 - #BM_Power_FLOAT/16 5761.676214194108 ns 5734.530059972358 ns 122056 - #BM_Power_FLOAT/17 7205.371829030732 ns 7172.0947809516465 ns 97604 - #BM_Power_FLOAT/18 7204.279098831734 ns 7172.448840759307 ns 97607 - #BM_Power_FLOAT/19 8648.461918826724 ns 8610.035399754119 ns 81300 - #BM_Power_FLOAT/20 11543.84161358285 ns 11492.703027516904 ns 60908 - #BM_Power_FLOAT/21 9375.01953072149 ns 9331.312011731832 ns 75010 - #BM_Power_FLOAT/22 17322.77400363915 ns 17240.430710872355 ns 40598 + #BM_Power_PCM16/0 337.760142649551 ns 336.25662704492527 ns 2081433 + #BM_Power_PCM16/1 337.9351210323131 ns 336.3553796166739 ns 2081152 + #BM_Power_PCM16/2 672.5364912371965 ns 669.4070783764836 ns 1045607 + #BM_Power_PCM16/3 1006.4414502095067 ns 1001.6641972800023 ns 698824 + #BM_Power_PCM16/4 1339.75865716885 ns 1333.3746327354206 ns 524826 + #BM_Power_PCM16/5 1339.747932594017 ns 1333.3662572435896 ns 525292 + #BM_Power_PCM16/6 1339.081792726286 ns 1333.2895640083038 ns 525010 + #BM_Power_PCM16/7 1338.8341052706744 ns 1333.1123506021647 ns 524857 + #BM_Power_PCM16/8 1674.8266277119628 ns 1667.4998296698461 ns 419773 + #BM_Power_PCM16/9 1675.1191347994438 ns 1667.2425555624354 ns 419978 + #BM_Power_PCM16/10 1674.71266726399 ns 1666.9600314300715 ns 419980 + #BM_Power_PCM16/11 2010.2484537350663 ns 2000.7651024934537 ns 349876 + #BM_Power_PCM16/12 2009.7556108023364 ns 2000.7828645744019 ns 349860 + #BM_Power_PCM16/13 2010.480003104438 ns 2000.914753011257 ns 349854 + #BM_Power_PCM16/14 2344.500011676101 ns 2333.4616815513364 ns 299973 + #BM_Power_PCM16/15 2679.159956909422 ns 2667.7558284453257 ns 262377 + #BM_Power_PCM16/16 2680.335192003616 ns 2667.7298096639197 ns 262378 + #BM_Power_PCM16/17 3349.3067194938394 ns 3333.070394636596 ns 210016 + #BM_Power_PCM16/18 3347.5797958892135 ns 3332.993186265871 ns 210017 + #BM_Power_PCM16/19 4017.7712421677643 ns 4000.1111676171067 ns 175006 + #BM_Power_PCM16/20 5353.909304200122 ns 5332.241398226324 ns 131252 + #BM_Power_PCM16/21 4351.829067676431 ns 4333.01033665303 ns 161561 + #BM_Power_PCM16/22 8033.3449012838255 ns 7998.6481625377255 ns 87512 + #BM_Power_PCM24/0 726.0724740108452 ns 722.793010652718 ns 968016 + #BM_Power_PCM24/1 726.5141612892924 ns 723.0428633830344 ns 968379 + #BM_Power_PCM24/2 1453.0771079215267 ns 1446.3117483448088 ns 483932 + #BM_Power_PCM24/3 2179.805517686665 ns 2169.532832665915 ns 322636 + #BM_Power_PCM24/4 2904.8858799182244 ns 2892.571683017743 ns 241982 + #BM_Power_PCM24/5 2905.290520956667 ns 2892.6407436793943 ns 241986 + #BM_Power_PCM24/6 2905.7997231004174 ns 2892.3390330578545 ns 242000 + #BM_Power_PCM24/7 2906.2596490773803 ns 2892.7177675295025 ns 241992 + #BM_Power_PCM24/8 3632.076809461512 ns 3615.545919869441 ns 193609 + #BM_Power_PCM24/9 3632.5518469824924 ns 3615.6390283459427 ns 193608 + #BM_Power_PCM24/10 3631.5230189943823 ns 3615.826974034231 ns 193601 + #BM_Power_PCM24/11 4358.713629609595 ns 4338.867286915848 ns 161333 + #BM_Power_PCM24/12 4359.231924050327 ns 4338.824437653807 ns 161333 + #BM_Power_PCM24/13 4356.325355513422 ns 4339.28872069524 ns 161322 + #BM_Power_PCM24/14 5086.698249103073 ns 5062.681608123064 ns 138273 + #BM_Power_PCM24/15 5812.5060886912215 ns 5785.645307165088 ns 120961 + #BM_Power_PCM24/16 5812.5832141796 ns 5785.508699569401 ns 120983 + #BM_Power_PCM24/17 7263.989452119297 ns 7231.864374560889 ns 96796 + #BM_Power_PCM24/18 7263.645282618586 ns 7231.993934070503 ns 96770 + #BM_Power_PCM24/19 8717.248425410799 ns 8678.296803709562 ns 80656 + #BM_Power_PCM24/20 11627.892071566856 ns 11574.479277610539 ns 60466 + #BM_Power_PCM24/21 9446.526244343248 ns 9406.349543664368 ns 74397 + #BM_Power_PCM24/22 17447.99081596151 ns 17374.784496239492 ns 40287 + #BM_Power_PCM32/0 366.5107706071915 ns 364.8308462714918 ns 1919065 + #BM_Power_PCM32/1 366.5168704771794 ns 364.7554321738571 ns 1918707 + #BM_Power_PCM32/2 730.2924269058933 ns 727.1057881827118 ns 962962 + #BM_Power_PCM32/3 1093.9912603896798 ns 1088.7929435922993 ns 642820 + #BM_Power_PCM32/4 1456.8093080845226 ns 1449.9832478951175 ns 482805 + #BM_Power_PCM32/5 1456.1232191254142 ns 1449.855881567137 ns 482839 + #BM_Power_PCM32/6 1456.2008645317271 ns 1449.290242604125 ns 482803 + #BM_Power_PCM32/7 1457.0090571482128 ns 1450.1991782065193 ns 482603 + #BM_Power_PCM32/8 1819.8793533353248 ns 1811.1948393741498 ns 386426 + #BM_Power_PCM32/9 1819.2882280461354 ns 1811.0842554160467 ns 386444 + #BM_Power_PCM32/10 1822.4888939503587 ns 1811.2359906431732 ns 386456 + #BM_Power_PCM32/11 2182.958268570418 ns 2172.377140481356 ns 322299 + #BM_Power_PCM32/12 2181.462500754493 ns 2171.936179393034 ns 322153 + #BM_Power_PCM32/13 2181.9423435499857 ns 2172.6346584016715 ns 322323 + #BM_Power_PCM32/14 2545.0172319340754 ns 2533.932252353998 ns 276231 + #BM_Power_PCM32/15 2910.0457980163624 ns 2897.3537004237564 ns 241648 + #BM_Power_PCM32/16 2911.5361569110396 ns 2897.792890320363 ns 241558 + #BM_Power_PCM32/17 3627.9648736853173 ns 3610.617067822403 ns 193815 + #BM_Power_PCM32/18 3626.573311653762 ns 3610.4587441726208 ns 193912 + #BM_Power_PCM32/19 4371.516904489751 ns 4351.228854700868 ns 160875 + #BM_Power_PCM32/20 5833.295410225075 ns 5806.0874899420005 ns 120551 + #BM_Power_PCM32/21 4733.5978237221925 ns 4713.533180254132 ns 148507 + #BM_Power_PCM32/22 8761.069766861208 ns 8719.434463509126 ns 80253 + #BM_Power_FLOAT/0 309.7573236829997 ns 308.3100559643444 ns 2270374 + #BM_Power_FLOAT/1 309.82671751979507 ns 308.31727146129305 ns 2269889 + #BM_Power_FLOAT/2 616.5533244216624 ns 613.6735468134156 ns 1140528 + #BM_Power_FLOAT/3 924.1979909165235 ns 919.7778739898862 ns 761050 + #BM_Power_FLOAT/4 1229.228159640607 ns 1223.550260462179 ns 572060 + #BM_Power_FLOAT/5 1229.1571063156637 ns 1223.5816435152462 ns 572103 + #BM_Power_FLOAT/6 1229.316886623843 ns 1223.9691527852942 ns 572110 + #BM_Power_FLOAT/7 1230.6891998748167 ns 1223.8458668046858 ns 571882 + #BM_Power_FLOAT/8 1537.1050916942645 ns 1530.2548960451468 ns 457410 + #BM_Power_FLOAT/9 1537.624473698849 ns 1530.2998740768824 ns 457422 + #BM_Power_FLOAT/10 1537.8752661993187 ns 1530.7318127550295 ns 457326 + #BM_Power_FLOAT/11 1839.3751016921826 ns 1830.8065497602443 ns 382243 + #BM_Power_FLOAT/12 1840.0720637536492 ns 1831.6316314246699 ns 382120 + #BM_Power_FLOAT/13 1839.6525013051378 ns 1831.4300958752372 ns 382059 + #BM_Power_FLOAT/14 2148.22182778337 ns 2138.606070233867 ns 327335 + #BM_Power_FLOAT/15 2452.349344804406 ns 2441.433373571317 ns 286628 + #BM_Power_FLOAT/16 2452.055538106065 ns 2441.4724420438542 ns 286596 + #BM_Power_FLOAT/17 3069.5813045295163 ns 3056.235010106019 ns 229071 + #BM_Power_FLOAT/18 3068.6837668400217 ns 3055.440662658508 ns 229198 + #BM_Power_FLOAT/19 3672.409374171318 ns 3656.6498973574044 ns 191441 + #BM_Power_FLOAT/20 4929.296286126466 ns 4905.616779854631 ns 142683 + #BM_Power_FLOAT/21 4001.2961863843757 ns 3975.4695817490483 ns 176210 + #BM_Power_FLOAT/22 7390.10927792279 ns 7357.815966854894 ns 95097 */ diff --git a/audio_utils/include/audio_utils/intrinsic_utils.h b/audio_utils/include/audio_utils/intrinsic_utils.h index 0c333e0e..beedd681 100644 --- a/audio_utils/include/audio_utils/intrinsic_utils.h +++ b/audio_utils/include/audio_utils/intrinsic_utils.h @@ -49,8 +49,54 @@ inline constexpr bool dependent_false_v = false; template<typename T, size_t N> struct internal_array_t { T v[N]; + static constexpr size_t size() { return N; } }; +// Detect if the value is directly addressable as an array. +// This is more advanced than std::is_array and works with neon intrinsics. +template<typename T> +concept is_array_like = requires(T a) { + a[0]; // can index first element +}; + +// Vector convert between type T to type S. +template <typename S, typename T> +inline S vconvert(const T& in) { + S out; + + if constexpr (is_array_like<S>) { + if constexpr (is_array_like<T>) { +#pragma unroll + // neon intrinsics need sizeof. + for (size_t i = 0; i < sizeof(in) / sizeof(in[0]); ++i) { + out[i] = in[i]; + } + } else { /* constexpr */ + const auto& [inv] = in; +#pragma unroll + for (size_t i = 0; i < T::size(); ++i) { + out[i] = inv[i]; + } + } + } else { /* constexpr */ + auto& [outv] = out; + if constexpr (is_array_like<T>) { +#pragma unroll + // neon intrinsics need sizeof. + for (size_t i = 0; i < sizeof(in) / sizeof(in[0]); ++i) { + outv[i] = in[i]; + } + } else { /* constexpr */ + const auto& [inv] = in; +#pragma unroll + for (size_t i = 0; i < T::size(); ++i) { + outv[i] = inv[i]; + } + } + } + return out; +} + /* Generalized template functions for the Neon instruction set. @@ -117,6 +163,46 @@ static inline T vadd(T a, T b) { } } +// add internally +template<typename T> +inline auto vaddv(const T& a) { + if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) { + return a; + +#ifdef USE_NEON + } else if constexpr (std::is_same_v<T, float32x2_t>) { + return vaddv_f32(a); +#if defined(__aarch64__) + } else if constexpr (std::is_same_v<T, float32x4_t>) { + return vaddvq_f32(a); + } else if constexpr (std::is_same_v<T, float64x2_t>) { + return vaddvq_f64(a); +#endif +#endif // USE_NEON + } else if constexpr (is_array_like<T>) { + using ret_t = std::decay_t<decltype(a[0])>; + + ret_t ret{}; + // array_like is not the same as an array, so we use sizeof here + // to handle neon instrinsics. +#pragma unroll + for (size_t i = 0; i < sizeof(a) / sizeof(a[0]); ++i) { + ret += a[i]; + } + return ret; + } else /* constexpr */ { + const auto &[aval] = a; + using ret_t = std::decay_t<decltype(aval[0])>; + ret_t ret{}; + +#pragma unroll + for (size_t i = 0; i < std::size(aval); ++i) { + ret += aval[i]; + } + return ret; + } +} + // duplicate float into all elements. template<typename T, typename F> static inline T vdupn(F f) { @@ -264,7 +350,7 @@ static inline T vmla(T a, F b, T c) { // fused multiply-add a + b * c template<typename T> -static inline T vmla(T a, T b, T c) { +inline T vmla(const T& a, const T& b, const T& c) { if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) { return a + b * c; diff --git a/audio_utils/power.cpp b/audio_utils/power.cpp index 58fa917f..699358f0 100644 --- a/audio_utils/power.cpp +++ b/audio_utils/power.cpp @@ -18,14 +18,12 @@ #define LOG_TAG "audio_utils_power" #include <log/log.h> -#include <algorithm> -#include <math.h> - #include <audio_utils/power.h> + +#include <audio_utils/intrinsic_utils.h> #include <audio_utils/primitives.h> #if defined(__aarch64__) || defined(__ARM_NEON__) -#include <arm_neon.h> #define USE_NEON #endif @@ -154,44 +152,31 @@ inline void energy(const void *amplitudes, size_t size, size_t numChannels, floa energyRef<FORMAT>(amplitudes, size, numChannels, out); } -// fast float power computation for ARM processors that support NEON. +// TODO(b/323611666) in some cases having a large kVectorWidth generic internal array is +// faster than the NEON intrinsic version. Optimize this. #ifdef USE_NEON +// The type conversion appears faster if we use a neon accumulator type. +constexpr size_t kVectorWidth = 4; +using AccumulatorType = float32x4_t; +#else +constexpr size_t kVectorWidth = 4; +using AccumulatorType = android::audio_utils::intrinsics::internal_array_t<float, kVectorWidth>; +#endif -template <typename T> -float32x4_t convertToFloatVectorAmplitude(T vamplitude) = delete; - -template <> -float32x4_t convertToFloatVectorAmplitude<float32x4_t>(float32x4_t vamplitude) { - return vamplitude; -} - -template <> -float32x4_t convertToFloatVectorAmplitude<int16x4_t>(int16x4_t vamplitude) { - const int32x4_t iamplitude = vmovl_s16(vamplitude); // expand s16 to s32 first - return vcvtq_f32_s32(iamplitude); -} - -template <> -float32x4_t convertToFloatVectorAmplitude<int32x4_t>(int32x4_t vamplitude) { - return vcvtq_f32_s32(vamplitude); -} - -template <typename Vector, typename Scalar> +template <typename Scalar, size_t N> inline float energyMonoVector(const void *amplitudes, size_t size) -{ - static_assert(sizeof(Vector) % sizeof(Scalar) == 0, - "Vector size must be a multiple of scalar size"); - const size_t vectorLength = sizeof(Vector) / sizeof(Scalar); // typically 4 (a const) - - // check pointer validity, must be aligned with scalar type. +{ // check pointer validity, must be aligned with scalar type. const Scalar *samplitudes = reinterpret_cast<const Scalar *>(amplitudes); LOG_ALWAYS_FATAL_IF((uintptr_t)samplitudes % alignof(Scalar) != 0, "Non-element aligned address: %p %zu", samplitudes, alignof(Scalar)); float accumulator = 0; + // seems that loading input data is fine using our generic intrinsic. + using Vector = android::audio_utils::intrinsics::internal_array_t<Scalar, N>; + // handle pointer unaligned to vector type. - while ((uintptr_t)samplitudes % alignof(Vector) != 0 /* compiler optimized */ && size > 0) { + while ((uintptr_t)samplitudes % sizeof(Vector) != 0 /* compiler optimized */ && size > 0) { const float amp = (float)*samplitudes++; accumulator += amp * amp; --size; @@ -201,21 +186,18 @@ inline float energyMonoVector(const void *amplitudes, size_t size) const Vector *vamplitudes = reinterpret_cast<const Vector *>(samplitudes); // clear vector accumulator - float32x4_t accum = vdupq_n_f32(0); + AccumulatorType accum{}; // iterate over array getting sum of squares in vectorLength lanes. size_t i; - for (i = 0; i < size - size % vectorLength /* compiler optimized */; i += vectorLength) { - const float32x4_t famplitude = convertToFloatVectorAmplitude(*vamplitudes++); - accum = vmlaq_f32(accum, famplitude, famplitude); + const size_t limit = size - size % N; + for (i = 0; i < limit; i += N) { + const auto famplitude = vconvert<AccumulatorType>(*vamplitudes++); + accum = android::audio_utils::intrinsics::vmla(accum, famplitude, famplitude); } - // narrow vectorLength lanes of floats - float32x2_t accum2 = vadd_f32(vget_low_f32(accum), vget_high_f32(accum)); // get stereo volume - accum2 = vpadd_f32(accum2, accum2); // combine to mono - - // accumulate vector - accumulator += vget_lane_f32(accum2, 0); + // add all components of the vector. + accumulator += android::audio_utils::intrinsics::vaddv(accum); // accumulate any trailing elements too small for vector size for (; i < size; ++i) { @@ -228,13 +210,13 @@ inline float energyMonoVector(const void *amplitudes, size_t size) template <> inline float energyMono<AUDIO_FORMAT_PCM_FLOAT>(const void *amplitudes, size_t size) { - return energyMonoVector<float32x4_t, float>(amplitudes, size); + return energyMonoVector<float, kVectorWidth>(amplitudes, size); } template <> inline float energyMono<AUDIO_FORMAT_PCM_16_BIT>(const void *amplitudes, size_t size) { - return energyMonoVector<int16x4_t, int16_t>(amplitudes, size) + return energyMonoVector<int16_t, kVectorWidth>(amplitudes, size) * normalizeEnergy<AUDIO_FORMAT_PCM_16_BIT>(); } @@ -242,7 +224,7 @@ inline float energyMono<AUDIO_FORMAT_PCM_16_BIT>(const void *amplitudes, size_t template <> inline float energyMono<AUDIO_FORMAT_PCM_32_BIT>(const void *amplitudes, size_t size) { - return energyMonoVector<int32x4_t, int32_t>(amplitudes, size) + return energyMonoVector<int32_t, kVectorWidth>(amplitudes, size) * normalizeEnergy<AUDIO_FORMAT_PCM_32_BIT>(); } @@ -250,12 +232,10 @@ inline float energyMono<AUDIO_FORMAT_PCM_32_BIT>(const void *amplitudes, size_t template <> inline float energyMono<AUDIO_FORMAT_PCM_8_24_BIT>(const void *amplitudes, size_t size) { - return energyMonoVector<int32x4_t, int32_t>(amplitudes, size) + return energyMonoVector<int32_t, kVectorWidth>(amplitudes, size) * normalizeEnergy<AUDIO_FORMAT_PCM_8_24_BIT>(); } -#endif // USE_NEON - } // namespace float audio_utils_compute_energy_mono(const void *buffer, audio_format_t format, size_t samples) |