mirror of
https://github.com/minio/minio.git
synced 2025-01-25 21:53:16 -05:00
Initial commit
This commit is contained in:
commit
397b887a87
202
LICENSE
Normal file
202
LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
11
NOTICE
Normal file
11
NOTICE
Normal file
@ -0,0 +1,11 @@
|
||||
Mini Object Storage
|
||||
Copyright 2014 Minios, Inc.
|
||||
|
||||
This product includes software developed at Minios, Inc.
|
||||
(http://minios.io/).
|
||||
|
||||
The Minios project contains unmodified subcomponents under the contrib
|
||||
folder with separate copyright notices and license terms. Your use of
|
||||
the source code for the these subcomponents is subject to the terms
|
||||
and conditions of the following licenses.
|
||||
|
51
docs/git/workflow.md
Normal file
51
docs/git/workflow.md
Normal file
@ -0,0 +1,51 @@
|
||||
Git Workflow
|
||||
============
|
||||
|
||||
Update local repo with latest changes from upstream
|
||||
```sh
|
||||
git fetch
|
||||
```
|
||||
|
||||
Create a new branch from the latest code
|
||||
```sh
|
||||
git checkout origin/master
|
||||
git checkout -b new_feature_branch
|
||||
```
|
||||
|
||||
```sh
|
||||
# do work here
|
||||
```
|
||||
|
||||
Create commit for submission
|
||||
```sh
|
||||
git commit -m "My Commit Message"
|
||||
```
|
||||
|
||||
Prepare commit for inclusion
|
||||
```sh
|
||||
git fetch
|
||||
git rebase origin/master
|
||||
```
|
||||
|
||||
Assuming no conflict, push to your personal fork.
|
||||
|
||||
```sh
|
||||
git push myrepo new_feature_branch:new_feature_branch
|
||||
# Visit https://github.com/minios/minios and create a new pull request
|
||||
from your branch.
|
||||
```
|
||||
|
||||
Useful Tools
|
||||
------------
|
||||
As an alternative to manually pushing and creating a branch, use github.com/docker/gordon pulls send command:
|
||||
|
||||
Create a new pull request.
|
||||
```sh
|
||||
pulls send
|
||||
# automatically performs git push and creates pull request
|
||||
```
|
||||
|
||||
Update an existing pull request (e.g. PR 42)
|
||||
```sh
|
||||
pulls send 42
|
||||
```
|
4
docs/internal/DESIGN
Normal file
4
docs/internal/DESIGN
Normal file
@ -0,0 +1,4 @@
|
||||
- Erasure code - http://bit.ly/1yqxkUt (intel isal library)
|
||||
- Future erasure codes to look at - Fountain Codes, Simple XOR techniques, Custom
|
||||
- Bootstrap techniques - no downtime scenario
|
||||
- Simple API's
|
9
docs/internal/INFRA
Normal file
9
docs/internal/INFRA
Normal file
@ -0,0 +1,9 @@
|
||||
- Google groups development (minios-dev)
|
||||
- Github groups (https://github.com/minios)
|
||||
- IRC users #minios
|
||||
- Support (zendesk.com)
|
||||
- Community Q/A (stackoverflow.com)
|
||||
- https://github.com/justinwalsh/daux.io (Document generator) or 'metalsmith.io'
|
||||
- Continous integration - (https://drone.io/, https://codeship.io,
|
||||
http://wercker.com/, https://coveralls.io/)
|
||||
- Web UI - polymer project
|
10
docs/metalsmith.json
Normal file
10
docs/metalsmith.json
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"source": "src",
|
||||
"destination": "build",
|
||||
"plugins": {
|
||||
"metalsmith-drafts": true,
|
||||
"metalsmith-markdown": true,
|
||||
"metalsmith-permalinks": "posts/:title",
|
||||
"metalsmith-templates": "handlebars"
|
||||
}
|
||||
}
|
3
erasure/.gitignore
vendored
Normal file
3
erasure/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
202
erasure/LICENSE
Normal file
202
erasure/LICENSE
Normal file
@ -0,0 +1,202 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
26
erasure/LICENSE.INTEL
Normal file
26
erasure/LICENSE.INTEL
Normal file
@ -0,0 +1,26 @@
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
42
erasure/Makefile
Normal file
42
erasure/Makefile
Normal file
@ -0,0 +1,42 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
|
||||
units = src
|
||||
|
||||
default: slib
|
||||
|
||||
include $(foreach unit,$(units), $(unit)/Makefile)
|
||||
|
||||
# Override individual lib names to make one inclusive library.
|
||||
lib_name := isa-l.a
|
||||
|
||||
include make.inc
|
||||
|
||||
VPATH = $(units) include
|
78
erasure/Makefile.nmake
Normal file
78
erasure/Makefile.nmake
Normal file
@ -0,0 +1,78 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
objs = src\ec-base.obj src\ec-highlevel-func.obj src\ec-multisrcary.obj src\gf-2vect-dot-prod-avx.obj src\gf-2vect-dot-prod-avx2.obj src\gf-2vect-dot-prod-sse.obj src\gf-3vect-dot-prod-avx.obj src\gf-3vect-dot-prod-avx2.obj src\gf-3vect-dot-prod-sse.obj src\gf-4vect-dot-prod-avx.obj src\gf-4vect-dot-prod-avx2.obj src\gf-4vect-dot-prod-sse.obj src\gf-5vect-dot-prod-avx.obj src\gf-5vect-dot-prod-avx2.obj src\gf-5vect-dot-prod-sse.obj src\gf-6vect-dot-prod-avx.obj src\gf-6vect-dot-prod-avx2.obj src\gf-6vect-dot-prod-sse.obj src\gf-vect-dot-prod-avx.obj src\gf-vect-dot-prod-avx2.obj src\gf-vect-dot-prod-sse.obj src\gf-vect-mul-avx.obj src\gf-vect-mul-sse.obj
|
||||
|
||||
libpath = c:\openssl\lib #set to ossl path for tests
|
||||
lisrcc = c:\openssl\include
|
||||
zlibpath = c:\zlib\lib
|
||||
zlisrcc = c:\zlib\include
|
||||
INCLUDES = -Isrc -Iinclude -I$(lisrcc) -I$(zlisrcc)
|
||||
CFLAGS = -O2 -D ZLIB-WINAPI -D NDEBUG /nologo -D-USE-MATH-DEFINES -Qstd=c99 $(INCLUDES) $(D)
|
||||
AFLAGS = -f win64 $(INCLUDES) $(D)
|
||||
CC = icl
|
||||
AS = yasm
|
||||
|
||||
lib: src isa-l.lib
|
||||
|
||||
src: ; -mkdir $@
|
||||
|
||||
isa-l.lib: $(objs)
|
||||
lib -out:$@ $?
|
||||
|
||||
{erasure-code}.c.obj:
|
||||
$(CC) $(CFLAGS) /c -Fo$@ $?
|
||||
{erasure-code}.asm.obj:
|
||||
$(AS) $(AFLAGS) -o $@ $?
|
||||
|
||||
|
||||
|
||||
.obj.exe:
|
||||
link /out:$@ /nologo /libpath:$(libpath) /libpath:$(zlibpath) isa-l.lib $?
|
||||
|
||||
# Unit tests
|
||||
tests = erasure-code-base-test.exe erasure-code-sse-test.exe erasure-code-test.exe gf-2vect-dot-prod-sse-test.exe gf-3vect-dot-prod-sse-test.exe gf-4vect-dot-prod-sse-test.exe gf-5vect-dot-prod-sse-test.exe gf-6vect-dot-prod-sse-test.exe gf-inverse-test.exe gf-vect-dot-prod-avx-test.exe gf-vect-dot-prod-base-test.exe gf-vect-dot-prod-sse-test.exe gf-vect-dot-prod-test.exe gf-vect-mul-avx-test.exe gf-vect-mul-base-test.exe gf-vect-mul-sse-test.exe gf-vect-mul-test.exe
|
||||
|
||||
tests: lib $(tests)
|
||||
$(tests): $(@B).obj
|
||||
|
||||
# Performance tests
|
||||
perfs = erasure-code-base-perf.exe erasure-code-perf.exe erasure-code-sse-perf.exe gf-2vect-dot-prod-sse-perf.exe gf-3vect-dot-prod-sse-perf.exe gf-4vect-dot-prod-sse-perf.exe gf-5vect-dot-prod-sse-perf.exe gf-6vect-dot-prod-sse-perf.exe gf-vect-dot-prod-1tbl.exe gf-vect-dot-prod-avx-perf.exe gf-vect-dot-prod-perf.exe gf-vect-dot-prod-sse-perf.exe gf-vect-mul-avx-perf.exe gf-vect-mul-perf.exe gf-vect-mul-sse-perf.exe
|
||||
|
||||
perfs: lib $(perfs)
|
||||
$(perfs): $(@B).obj
|
||||
|
||||
clean:
|
||||
-if exist *.obj del *.obj
|
||||
-if exist src\*.obj del src\*.obj
|
||||
-if exist *.exe del *.exe
|
||||
-if exist isa-l.lib del isa-l.lib
|
||||
|
||||
zlib.lib:
|
||||
libeay32.lib:
|
11
erasure/README.md
Normal file
11
erasure/README.md
Normal file
@ -0,0 +1,11 @@
|
||||
- Install 'build-essential'
|
||||
|
||||
~~~
|
||||
# apt-get install build-essential -y
|
||||
~~~
|
||||
|
||||
- Install 'yasm'
|
||||
|
||||
~~~
|
||||
# apt-get install yasm -y
|
||||
~~~
|
49
erasure/RELEASE-NOTES.INTEL
Normal file
49
erasure/RELEASE-NOTES.INTEL
Normal file
@ -0,0 +1,49 @@
|
||||
================================================================================
|
||||
v2.10 Intel Intelligent Storage Acceleration Library Release Notes
|
||||
Open Source Version
|
||||
================================================================================
|
||||
|
||||
================================================================================
|
||||
RELEASE NOTE CONTENTS
|
||||
================================================================================
|
||||
1. KNOWN ISSUES
|
||||
2. FIXED ISSUES
|
||||
3. CHANGE LOG & FEATURES ADDED
|
||||
|
||||
================================================================================
|
||||
1. KNOWN ISSUES
|
||||
================================================================================
|
||||
|
||||
* Only erasure code unit included in open source version at this time.
|
||||
|
||||
* Perf tests do not run in Windows environment.
|
||||
|
||||
* Leaving <unit>/bin directories from builds in unit directories will cause the
|
||||
top-level make build to fail. Build only in top-level or ensure unit
|
||||
directories are clean of objects and /bin.
|
||||
|
||||
* 32-bit lib is not supported in Windows.
|
||||
|
||||
================================================================================
|
||||
2. FIXED ISSUES
|
||||
================================================================================
|
||||
v2.10
|
||||
|
||||
* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only
|
||||
affects windows versions of erasure code. GP register saves/restore were
|
||||
pushed to same stack area as XMM.
|
||||
|
||||
================================================================================
|
||||
3. CHANGE LOG & FEATURES ADDED
|
||||
================================================================================
|
||||
v2.10
|
||||
|
||||
* Erasure code updates
|
||||
- New AVX and AVX2 support functions.
|
||||
- Changes min len requirement on gf_vect_dot_prod() to 32 from 16.
|
||||
- Tests include both source and parity recovery with ec_encode_data().
|
||||
- New encoding examples with Vandermonde or Cauchy matrix.
|
||||
|
||||
v2.8
|
||||
|
||||
* First open release of erasure code unit that is part of ISA-L.
|
5429
erasure/docs/isa-l_open_src_2.10.pdf
Normal file
5429
erasure/docs/isa-l_open_src_2.10.pdf
Normal file
File diff suppressed because it is too large
Load Diff
6680
erasure/include/ec-base.h
Normal file
6680
erasure/include/ec-base.h
Normal file
File diff suppressed because it is too large
Load Diff
659
erasure/include/erasure-code.h
Normal file
659
erasure/include/erasure-code.h
Normal file
@ -0,0 +1,659 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef _ERASURE_CODE_H_
|
||||
#define _ERASURE_CODE_H_
|
||||
|
||||
/**
|
||||
* @file erasure_code.h
|
||||
* @brief Interface to functions supporting erasure code encode and decode.
|
||||
*
|
||||
* This file defines the interface to optimized functions used in erasure
|
||||
* codes. Encode and decode of erasures in GF(2^8) are made by calculating the
|
||||
* dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
|
||||
* set of coefficients. Values for the coefficients are determined by the type
|
||||
* of erasure code. Using a general dot product means that any sequence of
|
||||
* coefficients may be used including erasure codes based on random
|
||||
* coefficients.
|
||||
* Multiple versions of dot product are supplied to calculate 1-6 output
|
||||
* vectors in one pass.
|
||||
* Base GF multiply and divide functions can be sped up by defining
|
||||
* GF_LARGE_TABLES at the expense of memory size.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gf-vect-mul.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Initialize tables for fast Erasure Code encode and decode.
|
||||
*
|
||||
* Generates the expanded tables needed for fast encode or decode for erasure
|
||||
* codes on blocks of data. 32bytes is generated for each input coefficient.
|
||||
*
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param a Pointer to sets of arrays of input coefficients used to encode
|
||||
* or decode data.
|
||||
* @param gftbls Pointer to start of space for concatenated output tables
|
||||
* generated from input coefficients. Must be of size 32*k*rows.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data.
|
||||
*
|
||||
* Given a list of source data blocks, generate one or multiple blocks of
|
||||
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||
* suitable set of coefficients, this function will perform the fast generation
|
||||
* or decoding of Reed-Solomon type erasure codes.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each block of data (vector) of source or dest data.
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||
* @param data Array of pointers to source input buffers.
|
||||
* @param coding Array of pointers to coded output buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
|
||||
*
|
||||
* Given a list of source data blocks, generate one or multiple blocks of
|
||||
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||
* suitable set of coefficients, this function will perform the fast generation
|
||||
* or decoding of Reed-Solomon type erasure codes.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each block of data (vector) of source or dest data.
|
||||
* @param k The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param rows The number of output vectors to concurrently encode/decode.
|
||||
* @param gftbls Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||
* @param data Array of pointers to source input buffers.
|
||||
* @param coding Array of pointers to coded output buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generate or decode erasure codes on blocks of data, runs baseline version.
|
||||
*
|
||||
* Given a list of source data blocks, generate one or multiple blocks of
|
||||
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||
* suitable set of coefficients, this function will perform the fast generation
|
||||
* or decoding of Reed-Solomon type erasure codes.
|
||||
*
|
||||
* @param len Length of each block of data (vector) of source or dest data.
|
||||
* @param srcs The number of vector sources or rows in the generator matrix
|
||||
* for coding.
|
||||
* @param dests The number of output vectors to concurrently encode/decode.
|
||||
* @param v Pointer to array of input tables generated from coding
|
||||
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||
* @param src Array of pointers to source input buffers.
|
||||
* @param dest Array of pointers to coded output buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, unsigned char **dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with two outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||
* GF(2^8) dot products across each byte of the input array and two constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with three outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||
* GF(2^8) dot products across each byte of the input array and three constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with four outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||
* GF(2^8) dot products across each byte of the input array and four constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with five outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||
* GF(2^8) dot products across each byte of the input array and five constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
*
|
||||
* @requires SSE4.1
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
*
|
||||
* @requires AVX
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product with six outputs.
|
||||
*
|
||||
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||
* GF(2^8) dot products across each byte of the input array and six constant
|
||||
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||
*
|
||||
* @requires AVX2
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||
* based on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Array of pointers to destination data buffers.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char **dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product, runs baseline version.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 16.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients. Only elements 32*CONST*j + 1
|
||||
* of this array are used, where j = (0, 1, 2...) and CONST is the
|
||||
* number of elements in the array of input coefficients. The
|
||||
* elements used correspond to the original input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector dot product, runs appropriate version.
|
||||
*
|
||||
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||
* set of coefficients to produce each byte of the output. Can be used for
|
||||
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||
* 32*vlen byte constant array based on the input coefficients.
|
||||
*
|
||||
* This function determines what instruction sets are enabled and
|
||||
* selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of each vector in bytes. Must be >= 32.
|
||||
* @param vlen Number of vector sources.
|
||||
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||
* on the array of input coefficients.
|
||||
* @param src Array of pointers to source inputs.
|
||||
* @param dest Pointer to destination data array.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
|
||||
unsigned char **src, unsigned char *dest);
|
||||
|
||||
/**********************************************************************
|
||||
* The remaining are lib support functions used in GF(2^8) operations.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Single element GF(2^8) multiply.
|
||||
*
|
||||
* @param a Multiplicand a
|
||||
* @param b Multiplicand b
|
||||
* @returns Product of a and b in GF(2^8)
|
||||
*/
|
||||
|
||||
unsigned char gf_mul(unsigned char a, unsigned char b);
|
||||
|
||||
/**
|
||||
* @brief Single element GF(2^8) inverse.
|
||||
*
|
||||
* @param a Input element
|
||||
* @returns Field element b such that a x b = {1}
|
||||
*/
|
||||
|
||||
unsigned char gf_inv(unsigned char a);
|
||||
|
||||
/**
|
||||
* @brief Generate a matrix of coefficients to be used for encoding.
|
||||
*
|
||||
* Vandermonde matrix example of encoding coefficients where high portion of
|
||||
* matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
|
||||
* i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
|
||||
* erasure encoding but does not guarantee invertable for every sub matrix. For
|
||||
* large k it is possible to find cases where the decode matrix chosen from
|
||||
* sources and parity not in erasure are not invertable. Users may want to
|
||||
* adjust for k > 5.
|
||||
*
|
||||
* @param a [mxk] array to hold coefficients
|
||||
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||
* @param k number of columns in matrix corresponding to srcs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k);
|
||||
|
||||
/**
|
||||
* @brief Generate a Cauchy matrix of coefficients to be used for encoding.
|
||||
*
|
||||
* Cauchy matrix example of encoding coefficients where high portion of matrix
|
||||
* is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
|
||||
* i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
|
||||
*
|
||||
* @param a [mxk] array to hold coefficients
|
||||
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||
* @param k number of columns in matrix corresponding to srcs.
|
||||
* @returns none
|
||||
*/
|
||||
|
||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
|
||||
|
||||
/**
|
||||
* @brief Invert a matrix in GF(2^8)
|
||||
*
|
||||
* @param in input matrix
|
||||
* @param out output matrix such that [in] x [out] = [I] - identity matrix
|
||||
* @param n size of matrix [nxn]
|
||||
* @returns 0 successful, other fail on singular input matrix
|
||||
*/
|
||||
|
||||
int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
|
||||
|
||||
/*************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_ERASURE_CODE_H_
|
81
erasure/include/erasure/tests.h
Normal file
81
erasure/include/erasure/tests.h
Normal file
@ -0,0 +1,81 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef __ERASURE_TESTS_H
|
||||
#define __ERASURE_TESTS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Use sys/time.h functions for time
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
struct perf{
|
||||
struct timeval tv;
|
||||
};
|
||||
|
||||
|
||||
inline int perf_start(struct perf *p)
|
||||
{
|
||||
return gettimeofday(&(p->tv), 0);
|
||||
}
|
||||
inline int perf_stop(struct perf *p)
|
||||
{
|
||||
return gettimeofday(&(p->tv), 0);
|
||||
}
|
||||
|
||||
inline void perf_print(struct perf stop, struct perf start, long long dsize)
|
||||
{
|
||||
long long secs = stop.tv.tv_sec - start.tv.tv_sec;
|
||||
long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec;
|
||||
|
||||
printf("runtime = %10lld usecs", usecs);
|
||||
if (dsize != 0) {
|
||||
#if 1 // not bug in printf for 32-bit
|
||||
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024),
|
||||
((double) usecs)/1000000, ((double) dsize) / (double)usecs);
|
||||
#else
|
||||
printf(", bandwidth %lld MB ", dsize/(1024*1024));
|
||||
printf("in %.4f sec ",(double)usecs/1000000);
|
||||
printf("= %.2f MB/s\n", (double)dsize/usecs);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __ERASURE_TESTS_H
|
80
erasure/include/erasure/types.h
Normal file
80
erasure/include/erasure/types.h
Normal file
@ -0,0 +1,80 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* @file types.h
|
||||
* @brief Defines standard width types.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __ERASURE_TYPES_H
|
||||
#define __ERASURE_TYPES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef __unix__
|
||||
#ifdef __MINGW32__
|
||||
# include <_mingw.h>
|
||||
#endif
|
||||
typedef unsigned __int64 UINT64;
|
||||
typedef __int64 INT64;
|
||||
typedef unsigned __int32 UINT32;
|
||||
typedef unsigned __int16 UINT16;
|
||||
typedef unsigned char UINT8;
|
||||
#else
|
||||
typedef unsigned long int UINT64;
|
||||
typedef long int INT64;
|
||||
typedef unsigned int UINT32;
|
||||
typedef unsigned short int UINT16;
|
||||
typedef unsigned char UINT8;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __unix__
|
||||
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
||||
# define __forceinline static inline
|
||||
#else
|
||||
# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
|
||||
# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
# define DEBUG_PRINT(x) printf x
|
||||
#else
|
||||
# define DEBUG_PRINT(x) do {} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //__ERASURE_TYPES_H
|
148
erasure/include/gf-vect-mul.h
Normal file
148
erasure/include/gf-vect-mul.h
Normal file
@ -0,0 +1,148 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
#ifndef _GF_VECT_MUL_H
|
||||
#define _GF_VECT_MUL_H
|
||||
|
||||
/**
|
||||
* @file gf-vect-mul.h
|
||||
* @brief Interface to functions for vector (block) multiplication in GF(2^8).
|
||||
*
|
||||
* This file defines the interface to routines used in fast RAID rebuild and
|
||||
* erasure codes.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
|
||||
* @requires SSE4.1
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
|
||||
* @requires AVX
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant, runs appropriate version.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
|
||||
* Len and src must be aligned to 32B.
|
||||
*
|
||||
* This function determines what instruction sets are enabled
|
||||
* and selects the appropriate version at runtime.
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
* @returns 0 pass, other fail
|
||||
*/
|
||||
|
||||
int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Initialize 32-byte constant array for GF(2^8) vector multiply
|
||||
*
|
||||
* Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
|
||||
* C{20}, ... , C{f0} } as required by other fast vector multiply
|
||||
* functions.
|
||||
* @param c Constant input.
|
||||
* @param gftbl Table output.
|
||||
*/
|
||||
|
||||
void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
|
||||
|
||||
|
||||
/**
|
||||
* @brief GF(2^8) vector multiply by constant, runs baseline version.
|
||||
*
|
||||
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||
* and partial write functions. Function requires pre-calculation of a
|
||||
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||
* and src must be aligned to 32B.
|
||||
*
|
||||
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||
* @param a Pointer to 32-byte array of pre-calculated constants based on C.
|
||||
* only use 2nd element is used.
|
||||
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||
*/
|
||||
|
||||
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
|
||||
unsigned char *dest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif //_GF_VECT_MUL_H
|
96
erasure/include/reg-sizes.asm
Normal file
96
erasure/include/reg-sizes.asm
Normal file
@ -0,0 +1,96 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%define EFLAGS_HAS_CPUID (1<<21)
|
||||
%define FLAG_CPUID1_ECX_CLMUL (1<<1)
|
||||
%define FLAG_CPUID1_EDX_SSE2 (1<<26)
|
||||
%define FLAG_CPUID1_ECX_SSE3 (1)
|
||||
%define FLAG_CPUID1_ECX_SSE4_1 (1<<19)
|
||||
%define FLAG_CPUID1_ECX_SSE4_2 (1<<20)
|
||||
%define FLAG_CPUID1_ECX_POPCNT (1<<23)
|
||||
%define FLAG_CPUID1_ECX_AESNI (1<<25)
|
||||
%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
|
||||
%define FLAG_CPUID1_ECX_AVX (1<<28)
|
||||
%define FLAG_CPUID1_EBX_AVX2 (1<<5)
|
||||
%define FLAG_XGETBV_EAX_XMM_YMM 0x6
|
||||
|
||||
%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
|
||||
|
||||
; define d and w variants for registers
|
||||
|
||||
%define raxd eax
|
||||
%define raxw ax
|
||||
%define raxb al
|
||||
|
||||
%define rbxd ebx
|
||||
%define rbxw bx
|
||||
%define rbxb bl
|
||||
|
||||
%define rcxd ecx
|
||||
%define rcxw cx
|
||||
%define rcxb cl
|
||||
|
||||
%define rdxd edx
|
||||
%define rdxw dx
|
||||
%define rdxb dl
|
||||
|
||||
%define rsid esi
|
||||
%define rsiw si
|
||||
%define rsib sil
|
||||
|
||||
%define rdid edi
|
||||
%define rdiw di
|
||||
%define rdib dil
|
||||
|
||||
%define rbpd ebp
|
||||
%define rbpw bp
|
||||
%define rbpb bpl
|
||||
|
||||
%define ymm0x xmm0
|
||||
%define ymm1x xmm1
|
||||
%define ymm2x xmm2
|
||||
%define ymm3x xmm3
|
||||
%define ymm4x xmm4
|
||||
%define ymm5x xmm5
|
||||
%define ymm6x xmm6
|
||||
%define ymm7x xmm7
|
||||
%define ymm8x xmm8
|
||||
%define ymm9x xmm9
|
||||
%define ymm10x xmm10
|
||||
%define ymm11x xmm11
|
||||
%define ymm12x xmm12
|
||||
%define ymm13x xmm13
|
||||
%define ymm14x xmm14
|
||||
%define ymm15x xmm15
|
||||
|
||||
%define DWORD(reg) reg %+ d
|
||||
%define WORD(reg) reg %+ w
|
||||
%define BYTE(reg) reg %+ b
|
||||
|
||||
%define XWORD(reg) reg %+ x
|
205
erasure/make.inc
Normal file
205
erasure/make.inc
Normal file
@ -0,0 +1,205 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
|
||||
# Makefile include for optimized libraries
|
||||
# make targets:
|
||||
# lib - build library of optimized functions
|
||||
# slib - build shared library
|
||||
# test - run unit tests of functions
|
||||
# perf - run performance tests
|
||||
# sim - run on simulator
|
||||
# trace - get simulator trace
|
||||
# clean - remove object files
|
||||
|
||||
CC = gcc
|
||||
AS = yasm
|
||||
SIM = sde $(SIMFLAGS) --
|
||||
|
||||
DEBUG = -g
|
||||
DEBUG_yasm = -g dwarf2
|
||||
DEBUG_nasm = -g
|
||||
|
||||
# Default arch= build options
|
||||
CFLAGS_gcc = -Wall
|
||||
ASFLAGS_ = -f elf64
|
||||
ARFLAGS_ = cr $@
|
||||
STRIP_gcc = strip -d -R .comment $@
|
||||
STRIP_clang = strip -d $@
|
||||
|
||||
# arch=32 build options
|
||||
ASFLAGS_32 = -f elf32
|
||||
CFLAGS_32 = -m32
|
||||
ARFLAGS_32 = cr $@
|
||||
|
||||
# arch=win64 build options
|
||||
ASFLAGS_win64 = -f win64
|
||||
CFLAGS_icl = -Qstd=c99
|
||||
ARFLAGS_win64 = -out:$@
|
||||
|
||||
# arch=mingw build options
|
||||
ASFLAGS_mingw = -f win64
|
||||
ARFLAGS_mingw = cr $@
|
||||
lsrcmingw = $(lsrc)
|
||||
unit_testsmingw = $(unit_tests)
|
||||
examplesmingw = $(examples)
|
||||
perf_testsmingw = $(perf_tests)
|
||||
|
||||
ifeq ($(arch),mingw)
|
||||
CC=x86_64-w64-mingw32-gcc
|
||||
AR=x86_64-w64-mingw32-ar
|
||||
LDFLAGS = -Wl,--force-exe-suffix
|
||||
endif
|
||||
|
||||
|
||||
INCLUDE = $(patsubst %,-I%,$(subst :, ,$(VPATH)))
|
||||
CFLAGS = $(CFLAGS_$(arch)) $(CFLAGS_$(CC)) $(DEBUG) -O2 $(DEFINES) $(INCLUDE)
|
||||
ASFLAGS = $(ASFLAGS_$(arch)) $(ASFLAGS_$(CC)) $(DEBUG_$(AS)) $(DEFINES) $(INCLUDE)
|
||||
ARFLAGS = $(ARFLAGS_$(arch))
|
||||
DEFINES += $(addprefix -D , $D)
|
||||
|
||||
O = src
|
||||
lobj += $(patsubst %.c,%.o,$(patsubst %.asm,%.o,$(lsrc$(arch))))
|
||||
objs = $(addprefix $(O)/,$(lobj))
|
||||
|
||||
|
||||
lib_name ?= isa-l.a
|
||||
default: lib
|
||||
|
||||
# Defaults for windows build
|
||||
ifeq ($(arch),win64)
|
||||
AR=lib
|
||||
CC=cl
|
||||
OUTPUT_OPTION = -Fo$@
|
||||
DEBUG=
|
||||
lib_name := $(basename $(lib_name)).lib
|
||||
endif
|
||||
lsrcwin64 = $(lsrc)
|
||||
unit_testswin64 = $(unit_tests)
|
||||
exampleswin64 = $(examples)
|
||||
perf_testswin64 = $(perf_tests)
|
||||
|
||||
# Build and run unit tests, performance tests, etc.
|
||||
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
|
||||
|
||||
$(sort $(unit_tests$(arch))): % : %.c $(tsrc$(arch)) $(lib_name)
|
||||
$(sort $(perf_tests$(arch))): % : %.c $(lib_name)
|
||||
$(sort $(examples$(arch))): % : %.c $(lib_name)
|
||||
$(sort $(other_tests)): % : %.c $(lib_name)
|
||||
|
||||
sim test trace: $(addsuffix .run,$(unit_tests$(arch)))
|
||||
perf: $(addsuffix .run,$(perf_tests$(arch)))
|
||||
ex: $(examples$(arch))
|
||||
all: lib $(all_tests)
|
||||
other: $(other_tests)
|
||||
tests: $(unit_tests$(arch))
|
||||
perfs: $(perf_tests$(arch))
|
||||
test perf: SIM=
|
||||
trace: SIMFLAGS = -debugtrace
|
||||
test sim:
|
||||
@echo Finished running tests
|
||||
|
||||
$(objs): | $(O)
|
||||
$(O): ; mkdir -p $(O)
|
||||
|
||||
|
||||
# Build rule to run tests
|
||||
%.run: %
|
||||
$(SIM) $(@D)/$<
|
||||
@echo Completed run: $<
|
||||
|
||||
# Other build rules
|
||||
msg = $(if $(DEBUG),DEBUG) $(patsubst 32,32-bit,$(arch)) $D
|
||||
|
||||
$(O)/%.o: %.asm
|
||||
@echo " ---> Building $< $(msg)"
|
||||
@$(AS) $(ASFLAGS) -o $@ $<
|
||||
|
||||
$(O)/%.o %.o: %.c
|
||||
@echo " ---> Building $< $(msg)"
|
||||
@$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||
|
||||
$(all_tests):
|
||||
@echo " ---> Building Test $@ $(msg)"
|
||||
@$(LINK.o) $(CFLAGS) $^ $(LDLIBS) -o $@
|
||||
|
||||
|
||||
# Target to build lib files
|
||||
lib: $(lib_name)
|
||||
ifneq ($(lib_debug),1)
|
||||
$(lib_name): DEBUG_$(AS)= # Don't put debug symbols in the lib
|
||||
$(lib_name): DEBUG=
|
||||
$(lib_name): DEFINES+=-D NDEBUG
|
||||
endif
|
||||
ifeq ($(lib_debug),1)
|
||||
DEBUG+=-D DEBUG # Define DEBUG for macros
|
||||
endif
|
||||
|
||||
#lib $(lib_name): $(lib_name)(${objs})
|
||||
$(lib_name): $(objs)
|
||||
@echo " ---> Creating Lib $@"
|
||||
@$(AR) $(ARFLAGS) $^
|
||||
@$(STRIP_$(CC)) $^
|
||||
|
||||
# Target for shared lib
|
||||
so_lib_name ?= $(basename $(lib_name)).so
|
||||
slib: $(so_lib_name)
|
||||
aobjs += $(addprefix $(O)/,$(patsubst %.asm,%.o,$(filter %.asm,$(lsrc$(arch)))))
|
||||
shared_objs += $(addprefix $(O)/shared_ver_,$(patsubst %.c,%.o,$(filter %.c,$(lsrc$(arch)))))
|
||||
|
||||
$(O)/shared_ver_%.o: %.c
|
||||
@echo " ---> Building shared $< $(msg)"
|
||||
@$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||
|
||||
ifneq ($(lib_debug),1)
|
||||
$(so_lib_name): DEBUG_$(AS)=
|
||||
$(so_lib_name): DEBUG=
|
||||
$(so_lib_name): DEFINES+=-D NDEBUG
|
||||
endif
|
||||
|
||||
$(shared_objs): CFLAGS += -fPIC
|
||||
$(shared_objs) $(aobjs): | $(O)
|
||||
$(so_lib_name): $(shared_objs) $(aobjs)
|
||||
@echo " ---> Creating Shared Lib $@"
|
||||
@$(CC) $(CFLAGS) -shared $(LDFLAGS) -o $@ $^
|
||||
@$(STRIP_$(CC)) $^
|
||||
|
||||
# Collect performance data
|
||||
rpt_name = perf_report_$(shell uname -n)_$(shell date +%y%m%d).perf
|
||||
|
||||
perf_report:
|
||||
echo Results for $(rpt_name) >> $(rpt_name)
|
||||
$(MAKE) -k perf | tee -a $(rpt_name)
|
||||
@echo Summary:
|
||||
-grep runtime $(rpt_name)
|
||||
|
||||
|
||||
clean:
|
||||
@echo Cleaning up
|
||||
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)
|
107
erasure/src/Makefile
Normal file
107
erasure/src/Makefile
Normal file
@ -0,0 +1,107 @@
|
||||
########################################################################
|
||||
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
########################################################################
|
||||
|
||||
|
||||
lib_name := erasure_code.a
|
||||
|
||||
lsrc += ec-highlevel-func.c \
|
||||
ec-base.c \
|
||||
gf-vect-mul-sse.asm \
|
||||
gf-vect-mul-avx.asm \
|
||||
gf-vect-dot-prod-sse.asm \
|
||||
gf-vect-dot-prod-avx.asm \
|
||||
gf-vect-dot-prod-avx2.asm \
|
||||
gf-2vect-dot-prod-sse.asm \
|
||||
gf-3vect-dot-prod-sse.asm \
|
||||
gf-4vect-dot-prod-sse.asm \
|
||||
gf-5vect-dot-prod-sse.asm \
|
||||
gf-6vect-dot-prod-sse.asm \
|
||||
gf-2vect-dot-prod-avx.asm \
|
||||
gf-3vect-dot-prod-avx.asm \
|
||||
gf-4vect-dot-prod-avx.asm \
|
||||
gf-5vect-dot-prod-avx.asm \
|
||||
gf-6vect-dot-prod-avx.asm \
|
||||
gf-2vect-dot-prod-avx2.asm \
|
||||
gf-3vect-dot-prod-avx2.asm \
|
||||
gf-4vect-dot-prod-avx2.asm \
|
||||
gf-5vect-dot-prod-avx2.asm \
|
||||
gf-6vect-dot-prod-avx2.asm \
|
||||
ec-multibinary.asm
|
||||
|
||||
lsrc32 += ec-highlevel-func.c ec-multibinary.asm ec-base.c
|
||||
|
||||
unit_tests32 += erasure-code-base-test erasure-code-test \
|
||||
gf-vect-mul-test gf-vect-mul-base-test \
|
||||
gf-vect-dot-prod-base-test gf-vect-dot-prod-test
|
||||
|
||||
perf_tests32 += gf-vect-mul-perf gf-vect-dot-prod-perf erasure-code-perf \
|
||||
erasure-code-base-perf gf-vect-dot-prod-1tbl
|
||||
|
||||
extern_hdrs += erasure-code.h ec_base.h gf-vect-mul.h \
|
||||
erasure/tests.h erausre/types.h
|
||||
|
||||
unit_tests += gf-vect-mul-test \
|
||||
gf-vect-mul-sse-test \
|
||||
gf-vect-mul-avx-test \
|
||||
gf-vect-mul-base-test \
|
||||
gf-vect-dot-prod-sse-test \
|
||||
gf-vect-dot-prod-avx-test \
|
||||
gf-2vect-dot-prod-sse-test \
|
||||
gf-3vect-dot-prod-sse-test \
|
||||
gf-4vect-dot-prod-sse-test \
|
||||
gf-5vect-dot-prod-sse-test \
|
||||
gf-6vect-dot-prod-sse-test \
|
||||
gf-inverse-test \
|
||||
gf-vect-dot-prod-base-test \
|
||||
gf-vect-dot-prod-test \
|
||||
erasure-code-test \
|
||||
erasure-code-base-test \
|
||||
erasure-code-sse-test
|
||||
|
||||
perf_tests += gf-vect-mul-perf \
|
||||
gf-vect-mul-sse-perf \
|
||||
gf-vect-mul-avx-perf \
|
||||
gf-vect-dot-prod-sse-perf \
|
||||
gf-vect-dot-prod-avx-perf \
|
||||
gf-2vect-dot-prod-sse-perf \
|
||||
gf-3vect-dot-prod-sse-perf \
|
||||
gf-4vect-dot-prod-sse-perf \
|
||||
gf-5vect-dot-prod-sse-perf \
|
||||
gf-6vect-dot-prod-sse-perf \
|
||||
gf-vect-dot-prod-perf \
|
||||
gf-vect-dot-prod-1tbl \
|
||||
erasure-code-perf \
|
||||
erasure-code-base-perf \
|
||||
erasure-code-sse-perf
|
||||
|
||||
other_src += reg-sizes.asm
|
||||
|
||||
VPATH = .. ../include
|
||||
|
||||
-include ../make.inc
|
320
erasure/src/ec-base.c
Normal file
320
erasure/src/ec-base.c
Normal file
@ -0,0 +1,320 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
#include "ec-base.h" // for GF tables
|
||||
#include "erasure/types.h"
|
||||
|
||||
unsigned char gf_mul(unsigned char a, unsigned char b)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
int i;
|
||||
|
||||
if ((a == 0) || (b == 0))
|
||||
return 0;
|
||||
|
||||
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||
#else
|
||||
return gf_mul_table_base[b * 256 + a];
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned char gf_inv(unsigned char a)
|
||||
{
|
||||
#ifndef GF_LARGE_TABLES
|
||||
if (a == 0)
|
||||
return 0;
|
||||
|
||||
return gff_base[255 - gflog_base[a]];
|
||||
#else
|
||||
return gf_inv_table_base[a];
|
||||
#endif
|
||||
}
|
||||
|
||||
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char p, gen = 1;
|
||||
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
for (i = k; i < m; i++) {
|
||||
p = 1;
|
||||
for (j = 0; j < k; j++) {
|
||||
a[k * i + j] = p;
|
||||
p = gf_mul(p, gen);
|
||||
}
|
||||
gen = gf_mul(gen, 2);
|
||||
}
|
||||
}
|
||||
|
||||
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *p;
|
||||
|
||||
// Identity matrix in high position
|
||||
memset(a, 0, k * m);
|
||||
for (i = 0; i < k; i++)
|
||||
a[k * i + i] = 1;
|
||||
|
||||
// For the rest choose 1/(i + j) | i != j
|
||||
p = &a[k * k];
|
||||
for (i = k; i < m; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
*p++ = gf_inv(i ^ j);
|
||||
|
||||
}
|
||||
|
||||
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||
{
|
||||
int i, j, k;
|
||||
unsigned char temp;
|
||||
|
||||
// Set out_mat[] to the identity matrix
|
||||
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||
out_mat[i] = 0;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
out_mat[i * n + i] = 1;
|
||||
|
||||
// Inverse
|
||||
for (i = 0; i < n; i++) {
|
||||
// Check for 0 in pivot element
|
||||
if (in_mat[i * n + i] == 0) {
|
||||
// Find a row with non-zero in current column and swap
|
||||
for (j = i + 1; j < n; j++)
|
||||
if (in_mat[j * n + i])
|
||||
break;
|
||||
|
||||
if (j == n) // Couldn't find means it's singular
|
||||
return -1;
|
||||
|
||||
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||
temp = in_mat[i * n + k];
|
||||
in_mat[i * n + k] = in_mat[j * n + k];
|
||||
in_mat[j * n + k] = temp;
|
||||
|
||||
temp = out_mat[i * n + k];
|
||||
out_mat[i * n + k] = out_mat[j * n + k];
|
||||
out_mat[j * n + k] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (j == i)
|
||||
continue;
|
||||
|
||||
temp = in_mat[j * n + i];
|
||||
for (k = 0; k < n; k++) {
|
||||
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculates const table gftbl in GF(2^8) from single input A
|
||||
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
||||
|
||||
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||
{
|
||||
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
unsigned long long v1, v2, v4, v8, *t;
|
||||
unsigned long long v10, v20, v40, v80;
|
||||
unsigned char c17, c18, c20, c24;
|
||||
|
||||
t = (unsigned long long *)tbl;
|
||||
|
||||
v1 = c * 0x0100010001000100ull;
|
||||
v2 = c2 * 0x0101000001010000ull;
|
||||
v4 = c4 * 0x0101010100000000ull;
|
||||
v8 = c8 * 0x0101010101010101ull;
|
||||
|
||||
v4 = v1 ^ v2 ^ v4;
|
||||
t[0] = v4;
|
||||
t[1] = v8 ^ v4;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
|
||||
v10 = c17 * 0x0100010001000100ull;
|
||||
v20 = c18 * 0x0101000001010000ull;
|
||||
v40 = c20 * 0x0101010100000000ull;
|
||||
v80 = c24 * 0x0101010101010101ull;
|
||||
|
||||
v40 = v10 ^ v20 ^ v40;
|
||||
t[2] = v40;
|
||||
t[3] = v80 ^ v40;
|
||||
|
||||
#else // 32-bit or other
|
||||
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
|
||||
c31;
|
||||
|
||||
c3 = c2 ^ c;
|
||||
c5 = c4 ^ c;
|
||||
c6 = c4 ^ c2;
|
||||
c7 = c4 ^ c3;
|
||||
|
||||
c9 = c8 ^ c;
|
||||
c10 = c8 ^ c2;
|
||||
c11 = c8 ^ c3;
|
||||
c12 = c8 ^ c4;
|
||||
c13 = c8 ^ c5;
|
||||
c14 = c8 ^ c6;
|
||||
c15 = c8 ^ c7;
|
||||
|
||||
tbl[0] = 0;
|
||||
tbl[1] = c;
|
||||
tbl[2] = c2;
|
||||
tbl[3] = c3;
|
||||
tbl[4] = c4;
|
||||
tbl[5] = c5;
|
||||
tbl[6] = c6;
|
||||
tbl[7] = c7;
|
||||
tbl[8] = c8;
|
||||
tbl[9] = c9;
|
||||
tbl[10] = c10;
|
||||
tbl[11] = c11;
|
||||
tbl[12] = c12;
|
||||
tbl[13] = c13;
|
||||
tbl[14] = c14;
|
||||
tbl[15] = c15;
|
||||
|
||||
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c19 = c18 ^ c17;
|
||||
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c21 = c20 ^ c17;
|
||||
c22 = c20 ^ c18;
|
||||
c23 = c20 ^ c19;
|
||||
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||
c25 = c24 ^ c17;
|
||||
c26 = c24 ^ c18;
|
||||
c27 = c24 ^ c19;
|
||||
c28 = c24 ^ c20;
|
||||
c29 = c24 ^ c21;
|
||||
c30 = c24 ^ c22;
|
||||
c31 = c24 ^ c23;
|
||||
|
||||
tbl[16] = 0;
|
||||
tbl[17] = c17;
|
||||
tbl[18] = c18;
|
||||
tbl[19] = c19;
|
||||
tbl[20] = c20;
|
||||
tbl[21] = c21;
|
||||
tbl[22] = c22;
|
||||
tbl[23] = c23;
|
||||
tbl[24] = c24;
|
||||
tbl[25] = c25;
|
||||
tbl[26] = c26;
|
||||
tbl[27] = c27;
|
||||
tbl[28] = c28;
|
||||
tbl[29] = c29;
|
||||
tbl[30] = c30;
|
||||
tbl[31] = c31;
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
|
||||
unsigned char **src, unsigned char *dest)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
|
||||
unsigned char **src, unsigned char **dest)
|
||||
{
|
||||
int i, j, l;
|
||||
unsigned char s;
|
||||
|
||||
for (l = 0; l < dests; l++) {
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < srcs; j++)
|
||||
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||
|
||||
dest[l][i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||
{
|
||||
//2nd element of table array is ref value used to fill it in
|
||||
unsigned char c = a[1];
|
||||
while (len-- > 0)
|
||||
*dest++ = gf_mul(c, *src++);
|
||||
}
|
||||
|
||||
struct slver {
|
||||
UINT16 snum;
|
||||
UINT8 ver;
|
||||
UINT8 core;
|
||||
};
|
||||
|
||||
// Version info
|
||||
struct slver gf_vect_mul_init_slver_00020035;
|
||||
struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 };
|
||||
|
||||
struct slver ec_encode_data_base_slver_00010135;
|
||||
struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 };
|
||||
|
||||
struct slver gf_vect_mul_base_slver_00010136;
|
||||
struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 };
|
||||
|
||||
struct slver gf_vect_dot_prod_base_slver_00010137;
|
||||
struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 };
|
152
erasure/src/ec-highlevel-func.c
Normal file
152
erasure/src/ec-highlevel-func.c
Normal file
@ -0,0 +1,152 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
#include <limits.h>
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < rows; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
gf_vect_mul_init(*a++, g_tbls);
|
||||
g_tbls += 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 16) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||
unsigned char **coding)
|
||||
{
|
||||
|
||||
if (len < 32) {
|
||||
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||
return;
|
||||
}
|
||||
|
||||
while (rows >= 4) {
|
||||
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
g_tbls += 4 * k * 32;
|
||||
coding += 4;
|
||||
rows -= 4;
|
||||
}
|
||||
switch (rows) {
|
||||
case 3:
|
||||
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 2:
|
||||
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||
break;
|
||||
case 1:
|
||||
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||
|
||||
struct slver {
|
||||
UINT16 snum;
|
||||
UINT8 ver;
|
||||
UINT8 core;
|
||||
};
|
||||
|
||||
// Version info
|
||||
struct slver ec_init_tables_slver_00010068;
|
||||
struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 };
|
||||
|
||||
struct slver ec_encode_data_sse_slver_00020069;
|
||||
struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 };
|
266
erasure/src/ec-multibinary.asm
Normal file
266
erasure/src/ec-multibinary.asm
Normal file
@ -0,0 +1,266 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define WRT_OPT wrt ..plt
|
||||
%else
|
||||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
[bits 32]
|
||||
|
||||
%define def_wrd dd
|
||||
%define wrd_sz dword
|
||||
%define arg1 esi
|
||||
|
||||
%else
|
||||
|
||||
%include "reg-sizes.asm"
|
||||
default rel
|
||||
[bits 64]
|
||||
|
||||
%define def_wrd dq
|
||||
%define wrd_sz qword
|
||||
%define arg1 rsi
|
||||
|
||||
extern ec_encode_data_sse
|
||||
extern ec_encode_data_avx
|
||||
extern ec_encode_data_avx2
|
||||
extern gf_vect_mul_sse
|
||||
extern gf_vect_mul_avx
|
||||
extern gf_vect_dot_prod_sse
|
||||
extern gf_vect_dot_prod_avx
|
||||
extern gf_vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
extern gf_vect_mul_base
|
||||
extern ec_encode_data_base
|
||||
extern gf_vect_dot_prod_base
|
||||
|
||||
section .data
|
||||
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
||||
;;; Therefore, *_dispatch_init is only executed on first call.
|
||||
|
||||
ec_encode_data_dispatched:
|
||||
def_wrd ec_encode_data_mbinit
|
||||
|
||||
gf_vect_mul_dispatched:
|
||||
def_wrd gf_vect_mul_mbinit
|
||||
|
||||
gf_vect_dot_prod_dispatched:
|
||||
def_wrd gf_vect_dot_prod_mbinit
|
||||
|
||||
section .text
|
||||
;;;;
|
||||
; ec_encode_data multibinary function
|
||||
;;;;
|
||||
global ec_encode_data:function
|
||||
ec_encode_data_mbinit:
|
||||
call ec_encode_data_dispatch_init
|
||||
|
||||
ec_encode_data:
|
||||
jmp wrd_sz [ec_encode_data_dispatched]
|
||||
|
||||
ec_encode_data_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [ec_encode_data_base]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [ec_encode_data_sse WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [ec_encode_data_avx WRT_OPT]
|
||||
|
||||
jne _done_ec_encode_data_init
|
||||
mov rsi, rbx
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [ec_encode_data_avx2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_ec_encode_data_init
|
||||
lea rsi, [ec_encode_data_sse WRT_OPT]
|
||||
|
||||
_done_ec_encode_data_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [ec_encode_data_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
;;;;
|
||||
; gf_vect_mul multibinary function
|
||||
;;;;
|
||||
global gf_vect_mul:function
|
||||
gf_vect_mul_mbinit:
|
||||
call gf_vect_mul_dispatch_init
|
||||
|
||||
gf_vect_mul:
|
||||
jmp wrd_sz [gf_vect_mul_dispatched]
|
||||
|
||||
gf_vect_mul_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [gf_vect_mul_base]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||
lea rbx, [gf_vect_mul_sse WRT_OPT]
|
||||
je _done_gf_vect_mul_dispatch_init
|
||||
mov arg1, rbx
|
||||
|
||||
;; Try for AVX
|
||||
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||
jne _done_gf_vect_mul_dispatch_init
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
jne _done_gf_vect_mul_dispatch_init
|
||||
lea arg1, [gf_vect_mul_avx WRT_OPT]
|
||||
|
||||
_done_gf_vect_mul_dispatch_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_mul_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
|
||||
;;;;
|
||||
; gf_vect_dot_prod multibinary function
|
||||
;;;;
|
||||
global gf_vect_dot_prod:function
|
||||
gf_vect_dot_prod_mbinit:
|
||||
call gf_vect_dot_prod_dispatch_init
|
||||
|
||||
gf_vect_dot_prod:
|
||||
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
||||
|
||||
gf_vect_dot_prod_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [gf_vect_dot_prod_base]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
|
||||
|
||||
jne _done_gf_vect_dot_prod_init
|
||||
mov rsi, rbx
|
||||
|
||||
;; Try for AVX2
|
||||
xor ecx, ecx
|
||||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
xor ecx, ecx
|
||||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_gf_vect_dot_prod_init
|
||||
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
|
||||
|
||||
_done_gf_vect_dot_prod_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_dot_prod_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion ec_encode_data, 00, 02, 0133
|
||||
slversion gf_vect_mul, 00, 02, 0134
|
||||
slversion gf_vect_dot_prod, 00, 01, 0138
|
168
erasure/src/erasure-code-base-perf.c
Normal file
168
erasure/src/erasure-code-base-perf.c
Normal file
@ -0,0 +1,168 @@
|
||||
/**********************************************************************
|
||||
COPYRIGHT(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (100*m)
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (10)
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS(m) 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, m, k, nerrs, r;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
// Pick test parameters
|
||||
m = 14;
|
||||
k = 10;
|
||||
nerrs = 4;
|
||||
const u8 err_list[] = {2, 4, 5, 7};
|
||||
|
||||
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||
printf(" Input test parameter error\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Start encode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||
|
||||
// Start decode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0) {
|
||||
printf("BAD MATRIX\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
764
erasure/src/erasure-code-base-test.c
Normal file
764
erasure/src/erasure-code-base-test.c
Normal file
@ -0,0 +1,764 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 127
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 50
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#define EFENCE_TEST_MIN_SIZE 16
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
#ifndef TEST_SEED
|
||||
#define TEST_SEED 11
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Generate Random errors
|
||||
static void gen_err_list(unsigned char *src_err_list,
|
||||
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||
{
|
||||
int i, err;
|
||||
int nerrs = 0, nsrcerrs = 0;
|
||||
|
||||
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err) {
|
||||
src_err_list[nerrs++] = i;
|
||||
if (i < k) {
|
||||
nsrcerrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= m) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
if (err < k)
|
||||
nsrcerrs = 1;
|
||||
}
|
||||
*pnerrs = nerrs;
|
||||
*pnsrcerrs = nsrcerrs;
|
||||
return;
|
||||
}
|
||||
|
||||
#define NO_INVERT_MATRIX -2
|
||||
// Generate decode matrix from encode matrix
|
||||
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||
unsigned char *decode_matrix,
|
||||
unsigned char *invert_matrix,
|
||||
unsigned int *decode_index,
|
||||
unsigned char *src_err_list,
|
||||
unsigned char *src_in_err,
|
||||
int nerrs, int nsrcerrs, int k, int m)
|
||||
{
|
||||
int i, j, p;
|
||||
int r;
|
||||
unsigned char *backup, *b, s;
|
||||
int incr = 0;
|
||||
|
||||
b = malloc(MMAX * KMAX);
|
||||
backup = malloc(MMAX * KMAX);
|
||||
|
||||
if (b == NULL || backup == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
free(b);
|
||||
free(backup);
|
||||
return -1;
|
||||
}
|
||||
// Construct matrix b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
for (j = 0; j < k; j++) {
|
||||
b[k * i + j] = encode_matrix[k * r + j];
|
||||
backup[k * i + j] = encode_matrix[k * r + j];
|
||||
}
|
||||
decode_index[i] = r;
|
||||
}
|
||||
incr = 0;
|
||||
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||
if (nerrs == (m - k)) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
incr++;
|
||||
memcpy(b, backup, MMAX * KMAX);
|
||||
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||
// skip the erased parity line
|
||||
incr++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (decode_index[k - 1] + incr >= m) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
decode_index[k - 1] += incr;
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||
|
||||
};
|
||||
|
||||
for (i = 0; i < nsrcerrs; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||
}
|
||||
}
|
||||
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||
for (p = nsrcerrs; p < nerrs; p++) {
|
||||
for (i = 0; i < k; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < k; j++)
|
||||
s ^= gf_mul(invert_matrix[j * k + i],
|
||||
encode_matrix[k * src_err_list[p] + j]);
|
||||
|
||||
decode_matrix[k * p + i] = s;
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
free(backup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int re = 0;
|
||||
int i, j, p, rtest, m, k;
|
||||
int nerrs, nsrcerrs;
|
||||
void *buf;
|
||||
unsigned int decode_index[MMAX];
|
||||
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||
unsigned char *recov[TEST_SOURCES];
|
||||
|
||||
int rows, align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *temp_ubuffs[TEST_SOURCES];
|
||||
|
||||
printf("erasure_code_base_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test erasure code by encode and recovery
|
||||
|
||||
encode_matrix = malloc(MMAX * KMAX);
|
||||
decode_matrix = malloc(MMAX * KMAX);
|
||||
invert_matrix = malloc(MMAX * KMAX);
|
||||
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||
if (encode_matrix == NULL || decode_matrix == NULL
|
||||
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
return -1;
|
||||
}
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Generate encode matrix encode_matrix
|
||||
// The matrix generated by gf_gen_rs_matrix
|
||||
// is not always invertable.
|
||||
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
k = 16;
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
if (k > KMAX)
|
||||
return -1;
|
||||
|
||||
for (rows = 1; rows <= 16; rows++) {
|
||||
m = k + rows;
|
||||
if (m > MMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
}
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_base(size, k, m - k, g_tbls, efence_buffs,
|
||||
&efence_buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = efence_buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 !=
|
||||
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||
size)) {
|
||||
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||
k, nerrs);
|
||||
|
||||
printf("size = %d\n", size);
|
||||
|
||||
printf("Test erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], align);
|
||||
printf("orig :");
|
||||
dump(efence_buffs[src_err_list[i]], align);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < m; i++) {
|
||||
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
}
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_base(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = ubuffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(ubuffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(ubuffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_ubuffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
|
||||
for (i = 0; i < m; i++) {
|
||||
|
||||
offset = ubuffs[i] - buffs[i];
|
||||
|
||||
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign encode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(buffs[i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign encode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign decode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign decode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test size alignment
|
||||
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||
|
||||
for (size = TEST_LEN; size > 0; size -= align) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_base(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("done EC tests: Pass\n");
|
||||
return 0;
|
||||
}
|
168
erasure/src/erasure-code-perf.c
Normal file
168
erasure/src/erasure-code-perf.c
Normal file
@ -0,0 +1,168 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (10000*m)
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (50*m)
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS(m) 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, m, k, nerrs, r;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
// Pick test parameters
|
||||
m = 14;
|
||||
k = 10;
|
||||
nerrs = 4;
|
||||
const u8 err_list[] = {2, 4, 5, 7};
|
||||
|
||||
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||
printf(" Input test parameter error\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Start encode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||
|
||||
// Start decode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0) {
|
||||
printf("BAD MATRIX\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
168
erasure/src/erasure-code-sse-perf.c
Normal file
168
erasure/src/erasure-code-sse-perf.c
Normal file
@ -0,0 +1,168 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 32
|
||||
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (10000*m)
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 32
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||
# define TEST_LOOPS(m) (50*m)
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS(m) 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, m, k, nerrs, r;
|
||||
void *buf;
|
||||
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
// Pick test parameters
|
||||
m = 14;
|
||||
k = 10;
|
||||
nerrs = 4;
|
||||
const u8 err_list[] = {2, 4, 5, 7};
|
||||
|
||||
printf("erasure_code_sse_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||
|
||||
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||
printf(" Input test parameter error\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(src_err_list, err_list, nerrs);
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0; i < nerrs; i++)
|
||||
src_in_err[src_err_list[i]] = 1;
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < m; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < (m - k); i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||
printf("alloc error: Fail\n");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN(m); j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Start encode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Make parity vects
|
||||
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("erasure_code_sse_encode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||
|
||||
// Start decode test
|
||||
perf_start(&start);
|
||||
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||
// Construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
recov[i] = buffs[r];
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix(b, d, k) < 0) {
|
||||
printf("BAD MATRIX\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++)
|
||||
for (j = 0; j < k; j++)
|
||||
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, c, g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("erasure_code_sse_decode" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
764
erasure/src/erasure-code-sse-test.c
Normal file
764
erasure/src/erasure-code-sse-test.c
Normal file
@ -0,0 +1,764 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 127
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 200
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#define EFENCE_TEST_MIN_SIZE 16
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
#ifndef TEST_SEED
|
||||
#define TEST_SEED 11
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Generate Random errors
|
||||
static void gen_err_list(unsigned char *src_err_list,
|
||||
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||
{
|
||||
int i, err;
|
||||
int nerrs = 0, nsrcerrs = 0;
|
||||
|
||||
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err) {
|
||||
src_err_list[nerrs++] = i;
|
||||
if (i < k) {
|
||||
nsrcerrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= m) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
if (err < k)
|
||||
nsrcerrs = 1;
|
||||
}
|
||||
*pnerrs = nerrs;
|
||||
*pnsrcerrs = nsrcerrs;
|
||||
return;
|
||||
}
|
||||
|
||||
#define NO_INVERT_MATRIX -2
|
||||
// Generate decode matrix from encode matrix
|
||||
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||
unsigned char *decode_matrix,
|
||||
unsigned char *invert_matrix,
|
||||
unsigned int *decode_index,
|
||||
unsigned char *src_err_list,
|
||||
unsigned char *src_in_err,
|
||||
int nerrs, int nsrcerrs, int k, int m)
|
||||
{
|
||||
int i, j, p;
|
||||
int r;
|
||||
unsigned char *backup, *b, s;
|
||||
int incr = 0;
|
||||
|
||||
b = malloc(MMAX * KMAX);
|
||||
backup = malloc(MMAX * KMAX);
|
||||
|
||||
if (b == NULL || backup == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
free(b);
|
||||
free(backup);
|
||||
return -1;
|
||||
}
|
||||
// Construct matrix b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
for (j = 0; j < k; j++) {
|
||||
b[k * i + j] = encode_matrix[k * r + j];
|
||||
backup[k * i + j] = encode_matrix[k * r + j];
|
||||
}
|
||||
decode_index[i] = r;
|
||||
}
|
||||
incr = 0;
|
||||
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||
if (nerrs == (m - k)) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
incr++;
|
||||
memcpy(b, backup, MMAX * KMAX);
|
||||
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||
// skip the erased parity line
|
||||
incr++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (decode_index[k - 1] + incr >= m) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
decode_index[k - 1] += incr;
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||
|
||||
};
|
||||
|
||||
for (i = 0; i < nsrcerrs; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||
}
|
||||
}
|
||||
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||
for (p = nsrcerrs; p < nerrs; p++) {
|
||||
for (i = 0; i < k; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < k; j++)
|
||||
s ^= gf_mul(invert_matrix[j * k + i],
|
||||
encode_matrix[k * src_err_list[p] + j]);
|
||||
|
||||
decode_matrix[k * p + i] = s;
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
free(backup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int re = 0;
|
||||
int i, j, p, rtest, m, k;
|
||||
int nerrs, nsrcerrs;
|
||||
void *buf;
|
||||
unsigned int decode_index[MMAX];
|
||||
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||
unsigned char *recov[TEST_SOURCES];
|
||||
|
||||
int rows, align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *temp_ubuffs[TEST_SOURCES];
|
||||
|
||||
printf("erasure_code_sse_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test erasure code by encode and recovery
|
||||
|
||||
encode_matrix = malloc(MMAX * KMAX);
|
||||
decode_matrix = malloc(MMAX * KMAX);
|
||||
invert_matrix = malloc(MMAX * KMAX);
|
||||
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||
if (encode_matrix == NULL || decode_matrix == NULL
|
||||
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
return -1;
|
||||
}
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Generate encode matrix encode_matrix
|
||||
// The matrix generated by gf_gen_rs_matrix
|
||||
// is not always invertable.
|
||||
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
k = 16;
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
if (k > KMAX)
|
||||
return -1;
|
||||
|
||||
for (rows = 1; rows <= 16; rows++) {
|
||||
m = k + rows;
|
||||
if (m > MMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
}
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_sse(size, k, m - k, g_tbls, efence_buffs,
|
||||
&efence_buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = efence_buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 !=
|
||||
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||
size)) {
|
||||
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||
k, nerrs);
|
||||
|
||||
printf("size = %d\n", size);
|
||||
|
||||
printf("Test erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], align);
|
||||
printf("orig :");
|
||||
dump(efence_buffs[src_err_list[i]], align);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < m; i++) {
|
||||
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
}
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_sse(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = ubuffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(ubuffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(ubuffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_ubuffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
|
||||
for (i = 0; i < m; i++) {
|
||||
|
||||
offset = ubuffs[i] - buffs[i];
|
||||
|
||||
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign encode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(buffs[i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign encode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign decode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign decode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test size alignment
|
||||
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||
|
||||
for (size = TEST_LEN; size > 0; size -= align) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data_sse(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("done EC tests: Pass\n");
|
||||
return 0;
|
||||
}
|
763
erasure/src/erasure-code-test.c
Normal file
763
erasure/src/erasure-code-test.c
Normal file
@ -0,0 +1,763 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 127
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 200
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#define EFENCE_TEST_MIN_SIZE 16
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
#ifndef TEST_SEED
|
||||
#define TEST_SEED 11
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Generate Random errors
|
||||
static void gen_err_list(unsigned char *src_err_list,
|
||||
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||
{
|
||||
int i, err;
|
||||
int nerrs = 0, nsrcerrs = 0;
|
||||
|
||||
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err) {
|
||||
src_err_list[nerrs++] = i;
|
||||
if (i < k) {
|
||||
nsrcerrs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= m) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
if (err < k)
|
||||
nsrcerrs = 1;
|
||||
}
|
||||
*pnerrs = nerrs;
|
||||
*pnsrcerrs = nsrcerrs;
|
||||
return;
|
||||
}
|
||||
|
||||
#define NO_INVERT_MATRIX -2
|
||||
// Generate decode matrix from encode matrix
|
||||
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||
unsigned char *decode_matrix,
|
||||
unsigned char *invert_matrix,
|
||||
unsigned int *decode_index,
|
||||
unsigned char *src_err_list,
|
||||
unsigned char *src_in_err,
|
||||
int nerrs, int nsrcerrs, int k, int m)
|
||||
{
|
||||
int i, j, p;
|
||||
int r;
|
||||
unsigned char *backup, *b, s;
|
||||
int incr = 0;
|
||||
|
||||
b = malloc(MMAX * KMAX);
|
||||
backup = malloc(MMAX * KMAX);
|
||||
|
||||
if (b == NULL || backup == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
free(b);
|
||||
free(backup);
|
||||
return -1;
|
||||
}
|
||||
// Construct matrix b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r])
|
||||
r++;
|
||||
for (j = 0; j < k; j++) {
|
||||
b[k * i + j] = encode_matrix[k * r + j];
|
||||
backup[k * i + j] = encode_matrix[k * r + j];
|
||||
}
|
||||
decode_index[i] = r;
|
||||
}
|
||||
incr = 0;
|
||||
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||
if (nerrs == (m - k)) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
incr++;
|
||||
memcpy(b, backup, MMAX * KMAX);
|
||||
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||
// skip the erased parity line
|
||||
incr++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (decode_index[k - 1] + incr >= m) {
|
||||
free(b);
|
||||
free(backup);
|
||||
printf("BAD MATRIX\n");
|
||||
return NO_INVERT_MATRIX;
|
||||
}
|
||||
decode_index[k - 1] += incr;
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||
|
||||
};
|
||||
|
||||
for (i = 0; i < nsrcerrs; i++) {
|
||||
for (j = 0; j < k; j++) {
|
||||
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||
}
|
||||
}
|
||||
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||
for (p = nsrcerrs; p < nerrs; p++) {
|
||||
for (i = 0; i < k; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < k; j++)
|
||||
s ^= gf_mul(invert_matrix[j * k + i],
|
||||
encode_matrix[k * src_err_list[p] + j]);
|
||||
|
||||
decode_matrix[k * p + i] = s;
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
free(backup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int re = 0;
|
||||
int i, j, p, rtest, m, k;
|
||||
int nerrs, nsrcerrs;
|
||||
void *buf;
|
||||
unsigned int decode_index[MMAX];
|
||||
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||
unsigned char *recov[TEST_SOURCES];
|
||||
|
||||
int rows, align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *temp_ubuffs[TEST_SOURCES];
|
||||
|
||||
printf("erasure_code_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
srand(TEST_SEED);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buffs[i] = buf;
|
||||
}
|
||||
|
||||
// Test erasure code by encode and recovery
|
||||
|
||||
encode_matrix = malloc(MMAX * KMAX);
|
||||
decode_matrix = malloc(MMAX * KMAX);
|
||||
invert_matrix = malloc(MMAX * KMAX);
|
||||
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||
if (encode_matrix == NULL || decode_matrix == NULL
|
||||
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||
printf("Test failure! Error with malloc\n");
|
||||
return -1;
|
||||
}
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Generate encode matrix encode_matrix
|
||||
// The matrix generated by gf_gen_rs_matrix
|
||||
// is not always invertable.
|
||||
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Generate g_tbls from encode matrix encode_matrix
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix encode_matrix
|
||||
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Choose random buffers to be in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||
nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
k = 16;
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
if (k > KMAX)
|
||||
return -1;
|
||||
|
||||
for (rows = 1; rows <= 16; rows++) {
|
||||
m = k + rows;
|
||||
if (m > MMAX)
|
||||
return -1;
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
}
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = efence_buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 !=
|
||||
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||
size)) {
|
||||
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||
k, nerrs);
|
||||
|
||||
printf("size = %d\n", size);
|
||||
|
||||
printf("Test erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], align);
|
||||
printf("orig :");
|
||||
dump(efence_buffs[src_err_list[i]], align);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < m; i++) {
|
||||
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
}
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = ubuffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(ubuffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(ubuffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_ubuffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
|
||||
for (i = 0; i < m; i++) {
|
||||
|
||||
offset = ubuffs[i] - buffs[i];
|
||||
|
||||
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign encode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(buffs[i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign encode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||
printf("Fail rand ualign decode pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp
|
||||
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||
PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign decode pad end\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test size alignment
|
||||
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||
|
||||
for (size = TEST_LEN; size > 0; size -= align) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// The matrix generated by gf_gen_cauchy1_matrix
|
||||
// is always invertable.
|
||||
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||
|
||||
// Make parity vects
|
||||
// Generate g_tbls from encode matrix a
|
||||
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||
// Perform matrix dot_prod for EC encoding
|
||||
// using g_tbls from encode matrix a
|
||||
ec_encode_data(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||
// Generate decode matrix
|
||||
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||
invert_matrix, decode_index, src_err_list,
|
||||
src_in_err, nerrs, nsrcerrs, k, m);
|
||||
if (re != 0) {
|
||||
printf("Fail to gf_gen_decode_matrix\n");
|
||||
return -1;
|
||||
}
|
||||
// Pack recovery array as list of valid sources
|
||||
// Its order must be the same as the order
|
||||
// to generate matrix b in gf_gen_decode_matrix
|
||||
for (i = 0; i < k; i++) {
|
||||
recov[i] = buffs[decode_index[i]];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
|
||||
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (j = 0; j < nerrs; j++)
|
||||
printf(" %d", src_err_list[j]);
|
||||
printf(" - Index = ");
|
||||
for (p = 0; p < k; p++)
|
||||
printf(" %d", decode_index[p]);
|
||||
printf("\nencode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||
printf("\ndecode_matrix:\n");
|
||||
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buffs[k + i], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("done EC tests: Pass\n");
|
||||
return 0;
|
||||
}
|
234
erasure/src/gf-2vect-dot-prod-avx.asm
Normal file
234
erasure/src/gf-2vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,234 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm8
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xmm5
|
||||
%define xgft2_hi xmm4
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_avx:function
|
||||
|
||||
func(gf_2vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
|
246
erasure/src/gf-2vect-dot-prod-avx2.asm
Normal file
246
erasure/src/gf-2vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,246 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm8
|
||||
%define xmask0fx xmm8
|
||||
%define xgft1_lo ymm7
|
||||
%define xgft1_hi ymm6
|
||||
%define xgft2_lo ymm5
|
||||
%define xgft2_hi ymm4
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_avx2:function
|
||||
|
||||
func(gf_2vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop32
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
|
216
erasure/src/gf-2vect-dot-prod-sse-perf.c
Normal file
216
erasure/src/gf-2vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,216 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32];
|
||||
u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
|
||||
u8 *buffs[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_2vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
|
||||
}
|
477
erasure/src/gf-2vect-dot-prod-sse-test.c
Normal file
477
erasure/src/gf-2vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,477 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 10000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32];
|
||||
u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
|
||||
u8 *buffs[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[2];
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(g1, 2, TEST_SOURCES);
|
||||
memset(g2, 1, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||
dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test1 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test2 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptrs[0] - dest1;
|
||||
|
||||
if (memcmp(dest1, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[1] - dest2;
|
||||
if (memcmp(dest2, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad2 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad2 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
}
|
236
erasure/src/gf-2vect-dot-prod-sse.asm
Normal file
236
erasure/src/gf-2vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,236 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_reg r12, 3*16 + 0*8
|
||||
save_reg r13, 3*16 + 1*8
|
||||
save_reg r14, 3*16 + 2*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
mov r12, [rsp + 3*16 + 0*8]
|
||||
mov r13, [rsp + 3*16 + 1*8]
|
||||
mov r14, [rsp + 3*16 + 2*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define dest2 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm8
|
||||
%define xgft1_lo xmm7
|
||||
%define xgft1_hi xmm6
|
||||
%define xgft2_lo xmm5
|
||||
%define xgft2_hi xmm4
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_sse:function
|
||||
|
||||
func(gf_2vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
.loop16
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
|
258
erasure/src/gf-3vect-dot-prod-avx.asm
Normal file
258
erasure/src/gf-3vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,258 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm11
|
||||
%define xgft1_lo xmm10
|
||||
%define xgft1_hi xmm9
|
||||
%define xgft2_lo xmm8
|
||||
%define xgft2_hi xmm7
|
||||
%define xgft3_lo xmm6
|
||||
%define xgft3_hi xmm5
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_avx:function
|
||||
func(gf_3vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop16:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
|
271
erasure/src/gf-3vect-dot-prod-avx2.asm
Normal file
271
erasure/src/gf-3vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,271 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm11
|
||||
%define xmask0fx xmm11
|
||||
%define xgft1_lo ymm10
|
||||
%define xgft1_hi ymm9
|
||||
%define xgft2_lo ymm8
|
||||
%define xgft2_hi ymm7
|
||||
%define xgft3_lo ymm6
|
||||
%define xgft3_hi ymm5
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_avx2:function
|
||||
func(gf_3vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop32:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
|
246
erasure/src/gf-3vect-dot-prod-sse-perf.c
Normal file
246
erasure/src/gf-3vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,246 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
|
||||
u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
|
||||
}
|
583
erasure/src/gf-3vect-dot-prod-sse-test.c
Normal file
583
erasure/src/gf-3vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,583 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 10000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
|
||||
u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[3];
|
||||
printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");;
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(g1, 2, TEST_SOURCES);
|
||||
memset(g2, 1, TEST_SOURCES);
|
||||
memset(g3, 7, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||
dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test1 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test2 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test3 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptrs[0] - dest1;
|
||||
|
||||
if (memcmp(dest1, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[1] - dest2;
|
||||
if (memcmp(dest2, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad2 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad2 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[2] - dest3;
|
||||
if (memcmp(dest3, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad3 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad3 end\n");;
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
}
|
259
erasure/src/gf-3vect-dot-prod-sse.asm
Normal file
259
erasure/src/gf-3vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,259 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_reg r12, 6*16 + 0*8
|
||||
save_reg r13, 6*16 + 1*8
|
||||
save_reg r14, 6*16 + 2*8
|
||||
save_reg r15, 6*16 + 3*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
mov r12, [rsp + 6*16 + 0*8]
|
||||
mov r13, [rsp + 6*16 + 1*8]
|
||||
mov r14, [rsp + 6*16 + 2*8]
|
||||
mov r15, [rsp + 6*16 + 3*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm11
|
||||
%define xgft1_lo xmm10
|
||||
%define xgft1_hi xmm9
|
||||
%define xgft2_lo xmm8
|
||||
%define xgft2_hi xmm7
|
||||
%define xgft3_lo xmm6
|
||||
%define xgft3_hi xmm5
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_sse:function
|
||||
func(gf_3vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop16:
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
|
296
erasure/src/gf-4vect-dot-prod-avx.asm
Normal file
296
erasure/src/gf-4vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,296 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm13
|
||||
%define xgft1_hi xmm12
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm10
|
||||
%define xgft3_lo xmm9
|
||||
%define xgft3_hi xmm8
|
||||
%define xgft4_lo xmm7
|
||||
%define xgft4_hi xmm6
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_avx:function
|
||||
func(gf_4vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop16:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
|
305
erasure/src/gf-4vect-dot-prod-avx2.asm
Normal file
305
erasure/src/gf-4vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,305 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm14
|
||||
%define xmask0fx xmm14
|
||||
%define xgft1_lo ymm13
|
||||
%define xgft1_hi ymm12
|
||||
%define xgft2_lo ymm11
|
||||
%define xgft2_hi ymm10
|
||||
%define xgft3_lo ymm9
|
||||
%define xgft3_hi ymm8
|
||||
%define xgft4_lo ymm7
|
||||
%define xgft4_hi ymm6
|
||||
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_avx2:function
|
||||
func(gf_4vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop32:
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add vec_i, PS
|
||||
|
||||
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
add tmp, 32
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
|
281
erasure/src/gf-4vect-dot-prod-sse-perf.c
Normal file
281
erasure/src/gf-4vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,281 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES];
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||
u8 *dest_ref4, *dest_ptrs[4];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_4vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
|
||||
}
|
692
erasure/src/gf-4vect-dot-prod-sse-test.c
Normal file
692
erasure/src/gf-4vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,692 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 10000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES];
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||
u8 *dest_ref4, *dest_ptrs[4];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[4];
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
memset(g1, 2, TEST_SOURCES);
|
||||
memset(g2, 1, TEST_SOURCES);
|
||||
memset(g3, 7, TEST_SOURCES);
|
||||
memset(g4, 3, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||
dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test1 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test2 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test3 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test4 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptrs[0] - dest1;
|
||||
|
||||
if (memcmp(dest1, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
printf("size=%d offset=%d srcs=%d\n", size, offset, srcs);
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[1] - dest2;
|
||||
if (memcmp(dest2, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad2 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad2 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[2] - dest3;
|
||||
if (memcmp(dest3, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad3 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad3 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[3] - dest4;
|
||||
if (memcmp(dest4, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad4 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad4 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
}
|
296
erasure/src/gf-4vect-dot-prod-sse.asm
Normal file
296
erasure/src/gf-4vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,296 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_reg r12, 9*16 + 0*8
|
||||
save_reg r13, 9*16 + 1*8
|
||||
save_reg r14, 9*16 + 2*8
|
||||
save_reg r15, 9*16 + 3*8
|
||||
save_reg rdi, 9*16 + 4*8
|
||||
save_reg rsi, 9*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
mov r12, [rsp + 9*16 + 0*8]
|
||||
mov r13, [rsp + 9*16 + 1*8]
|
||||
mov r14, [rsp + 9*16 + 2*8]
|
||||
mov r15, [rsp + 9*16 + 3*8]
|
||||
mov rdi, [rsp + 9*16 + 4*8]
|
||||
mov rsi, [rsp + 9*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest1 arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest2 tmp3
|
||||
%define dest3 tmp4
|
||||
%define dest4 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm14
|
||||
%define xgft1_lo xmm13
|
||||
%define xgft1_hi xmm12
|
||||
%define xgft2_lo xmm11
|
||||
%define xgft2_hi xmm10
|
||||
%define xgft3_lo xmm9
|
||||
%define xgft3_hi xmm8
|
||||
%define xgft4_lo xmm7
|
||||
%define xgft4_hi xmm6
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_sse:function
|
||||
func(gf_4vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest2, [dest1+PS]
|
||||
mov dest3, [dest1+2*PS]
|
||||
mov dest4, [dest1+3*PS]
|
||||
mov dest1, [dest1]
|
||||
|
||||
|
||||
.loop16:
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
movdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft4_hi, [tmp+vskip3+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, PS
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [dest3+pos], xp3
|
||||
XSTR [dest4+pos], xp4
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
|
311
erasure/src/gf-5vect-dot-prod-avx.asm
Normal file
311
erasure/src/gf-5vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,311 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define xgft4_lo xmm8
|
||||
%define xgft4_hi xmm7
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
|
||||
align 16
|
||||
global gf_5vect_dot_prod_avx:function
|
||||
func(gf_5vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
add tmp, 32
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_5vect_dot_prod_avx, 02, 03, 0194
|
323
erasure/src/gf-5vect-dot-prod-avx2.asm
Normal file
323
erasure/src/gf-5vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,323 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
vmovdqa [rsp + 9*16], xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft1_hi ymm13
|
||||
%define xgft2_lo ymm12
|
||||
%define xgft2_hi ymm11
|
||||
%define xgft3_lo ymm10
|
||||
%define xgft3_hi ymm9
|
||||
%define xgft4_lo ymm8
|
||||
%define xgft4_hi ymm7
|
||||
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%define xp5 ymm6
|
||||
|
||||
align 16
|
||||
global gf_5vect_dot_prod_avx2:function
|
||||
func(gf_5vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop32:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add vec_i, PS
|
||||
|
||||
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
add tmp, 32
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
|
319
erasure/src/gf-5vect-dot-prod-sse-perf.c
Normal file
319
erasure/src/gf-5vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,319 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls, *buffs[TEST_SOURCES];
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest_ref1, *dest_ref2;
|
||||
u8 *dest_ref3, *dest_ref4, *dest_ref5, *dest_ptrs[5];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref5 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
dest_ptrs[4] = dest5;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
memset(dest_ref5, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||
dest_ref5);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS / 20; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
buffs, dest_ref5);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_5vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
|
||||
}
|
805
erasure/src/gf-5vect-dot-prod-sse-test.c
Normal file
805
erasure/src/gf-5vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,805 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 20000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls;
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES];
|
||||
u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5;
|
||||
u8 *dest_ptrs[5];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[5];
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref5 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
dest_ptrs[4] = dest5;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
memset(dest_ref5, 0, TEST_LEN);
|
||||
memset(g1, 2, TEST_SOURCES);
|
||||
memset(g2, 1, TEST_SOURCES);
|
||||
memset(g3, 7, TEST_SOURCES);
|
||||
memset(g4, 9, TEST_SOURCES);
|
||||
memset(g5, 4, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||
dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
buffs, dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
|
||||
dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test1 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test2 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test3 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test4 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test5 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref5, dest5, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref5, udest_ptrs[4], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[4], 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptrs[0] - dest1;
|
||||
|
||||
if (memcmp(dest1, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[1] - dest2;
|
||||
if (memcmp(dest2, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad2 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad2 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[2] - dest3;
|
||||
if (memcmp(dest3, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad3 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad3 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[3] - dest4;
|
||||
if (memcmp(dest4, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad4 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad4 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[4] - dest5;
|
||||
if (memcmp(dest5, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad5 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad5 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[0], 25);
|
||||
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref5, dest_ptrs[4], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[4], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
}
|
312
erasure/src/gf-5vect-dot-prod-sse.asm
Normal file
312
erasure/src/gf-5vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,312 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
movdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define xgft4_lo xmm8
|
||||
%define xgft4_hi xmm7
|
||||
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
|
||||
align 16
|
||||
global gf_5vect_dot_prod_sse:function
|
||||
func(gf_5vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
pxor xp5, xp5
|
||||
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
add tmp, 32
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||
pxor xp4, xgft4_hi ;xp4 += partial
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp5, xgft1_hi ;xp5 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
|
323
erasure/src/gf-6vect-dot-prod-avx.asm
Normal file
323
erasure/src/gf-6vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,323 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
%define xp6 xmm7
|
||||
|
||||
align 16
|
||||
global gf_6vect_dot_prod_avx:function
|
||||
func(gf_6vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
vpxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
|
334
erasure/src/gf-6vect-dot-prod-avx2.asm
Normal file
334
erasure/src/gf-6vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,334 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
vmovdqa [rsp + 0*16], xmm6
|
||||
vmovdqa [rsp + 1*16], xmm7
|
||||
vmovdqa [rsp + 2*16], xmm8
|
||||
vmovdqa [rsp + 3*16], xmm9
|
||||
vmovdqa [rsp + 4*16], xmm10
|
||||
vmovdqa [rsp + 5*16], xmm11
|
||||
vmovdqa [rsp + 6*16], xmm12
|
||||
vmovdqa [rsp + 7*16], xmm13
|
||||
vmovdqa [rsp + 8*16], xmm14
|
||||
vmovdqa [rsp + 9*16], xmm15
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm8, [rsp + 2*16]
|
||||
vmovdqa xmm9, [rsp + 3*16]
|
||||
vmovdqa xmm10, [rsp + 4*16]
|
||||
vmovdqa xmm11, [rsp + 5*16]
|
||||
vmovdqa xmm12, [rsp + 6*16]
|
||||
vmovdqa xmm13, [rsp + 7*16]
|
||||
vmovdqa xmm14, [rsp + 8*16]
|
||||
vmovdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm15
|
||||
%define xmask0fx xmm15
|
||||
%define xgft1_lo ymm14
|
||||
%define xgft1_hi ymm13
|
||||
%define xgft2_lo ymm12
|
||||
%define xgft2_hi ymm11
|
||||
%define xgft3_lo ymm10
|
||||
%define xgft3_hi ymm9
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp1 ymm2
|
||||
%define xp2 ymm3
|
||||
%define xp3 ymm4
|
||||
%define xp4 ymm5
|
||||
%define xp5 ymm6
|
||||
%define xp6 ymm7
|
||||
|
||||
align 16
|
||||
global gf_6vect_dot_prod_avx2:function
|
||||
func(gf_6vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop32:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
vpxor xp1, xp1
|
||||
vpxor xp2, xp2
|
||||
vpxor xp3, xp3
|
||||
vpxor xp4, xp4
|
||||
vpxor xp5, xp5
|
||||
vpxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add vec_i, PS
|
||||
|
||||
vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||
vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||
|
||||
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||
|
||||
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_6vect_dot_prod_avx2, 04, 03, 019a
|
352
erasure/src/gf-6vect-dot-prod-sse-perf.c
Normal file
352
erasure/src/gf-6vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,352 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
|
||||
u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
|
||||
u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest6 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref6 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
dest_ptrs[4] = dest5;
|
||||
dest_ptrs[5] = dest6;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
memset(dest6, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
memset(dest_ref5, 0, TEST_LEN);
|
||||
memset(dest_ref6, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||
dest_ref5);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
|
||||
dest_ref6);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS / 20; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
buffs, dest_ref5);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||
buffs, dest_ref6);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_6vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||
}
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test6\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest6, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
|
||||
}
|
911
erasure/src/gf-6vect-dot-prod-sse-test.c
Normal file
911
erasure/src/gf-6vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,911 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 20000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs;
|
||||
void *buf;
|
||||
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
|
||||
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
|
||||
u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
|
||||
u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptrs[6];
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
g_tbls = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest6 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref1 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref2 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref3 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref4 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref5 = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref6 = buf;
|
||||
|
||||
dest_ptrs[0] = dest1;
|
||||
dest_ptrs[1] = dest2;
|
||||
dest_ptrs[2] = dest3;
|
||||
dest_ptrs[3] = dest4;
|
||||
dest_ptrs[4] = dest5;
|
||||
dest_ptrs[5] = dest6;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest1, 0, TEST_LEN);
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
memset(dest6, 0, TEST_LEN);
|
||||
memset(dest_ref1, 0, TEST_LEN);
|
||||
memset(dest_ref2, 0, TEST_LEN);
|
||||
memset(dest_ref3, 0, TEST_LEN);
|
||||
memset(dest_ref4, 0, TEST_LEN);
|
||||
memset(dest_ref5, 0, TEST_LEN);
|
||||
memset(dest_ref6, 0, TEST_LEN);
|
||||
memset(g1, 2, TEST_SOURCES);
|
||||
memset(g2, 1, TEST_SOURCES);
|
||||
memset(g3, 7, TEST_SOURCES);
|
||||
memset(g4, 9, TEST_SOURCES);
|
||||
memset(g5, 4, TEST_SOURCES);
|
||||
memset(g6, 0xe6, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||
dest_ref5);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
|
||||
dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest6, 25);
|
||||
return -1;
|
||||
}
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
buffs, dest_ref5);
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||
buffs, dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest6, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||
dest_ref2);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||
dest_ref3);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||
dest_ref4);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
|
||||
dest_ref5);
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs,
|
||||
dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test1 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test2 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test3 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test4 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test5 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, 25);
|
||||
return -1;
|
||||
}
|
||||
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||
" test6 srcs=%d\n", srcs);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest6, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref5);
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||
efence_buffs, dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||
|
||||
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest1, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest2, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest3, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest4, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref5, dest5, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest5, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 != memcmp(dest_ref6, dest6, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, align);
|
||||
printf("dprod_dut:");
|
||||
dump(dest6, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||
memset(dest2, 0, TEST_LEN);
|
||||
memset(dest3, 0, TEST_LEN);
|
||||
memset(dest4, 0, TEST_LEN);
|
||||
memset(dest5, 0, TEST_LEN);
|
||||
memset(dest6, 0, TEST_LEN);
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref5, udest_ptrs[4], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[4], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref6, udest_ptrs[5], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(udest_ptrs[5], 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptrs[0] - dest1;
|
||||
|
||||
if (memcmp(dest1, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad1 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad1 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[1] - dest2;
|
||||
if (memcmp(dest2, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad2 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad2 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[2] - dest3;
|
||||
if (memcmp(dest3, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad3 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad3 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[3] - dest4;
|
||||
if (memcmp(dest4, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad4 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad4 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[4] - dest5;
|
||||
if (memcmp(dest5, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad5 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad5 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
offset = udest_ptrs[5] - dest6;
|
||||
if (memcmp(dest6, dest_ref1, offset)) {
|
||||
printf("Fail rand ualign pad6 start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad6 end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
g1[i] = rand();
|
||||
g2[i] = rand();
|
||||
g3[i] = rand();
|
||||
g4[i] = rand();
|
||||
g5[i] = rand();
|
||||
g6[i] = rand();
|
||||
}
|
||||
|
||||
for (i = 0; i < srcs; i++) {
|
||||
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||
|
||||
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref1, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[0], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref2, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[1], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref3, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[2], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref4, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[3], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref5, dest_ptrs[4], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref5, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[4], 25);
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest_ref6, dest_ptrs[5], size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref6, 25);
|
||||
printf("dprod_dut:");
|
||||
dump(dest_ptrs[5], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Pass\n");
|
||||
return 0;
|
||||
|
||||
}
|
323
erasure/src/gf-6vect-dot-prod-sse.asm
Normal file
323
erasure/src/gf-6vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,323 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved, loaded and restored
|
||||
%define arg5 r15 ; must be saved and restored
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r14 ; must be saved and restored
|
||||
%define tmp5 rdi ; must be saved and restored
|
||||
%define tmp6 rsi ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm8, 2*16
|
||||
save_xmm128 xmm9, 3*16
|
||||
save_xmm128 xmm10, 4*16
|
||||
save_xmm128 xmm11, 5*16
|
||||
save_xmm128 xmm12, 6*16
|
||||
save_xmm128 xmm13, 7*16
|
||||
save_xmm128 xmm14, 8*16
|
||||
save_xmm128 xmm15, 9*16
|
||||
save_reg r12, 10*16 + 0*8
|
||||
save_reg r13, 10*16 + 1*8
|
||||
save_reg r14, 10*16 + 2*8
|
||||
save_reg r15, 10*16 + 3*8
|
||||
save_reg rdi, 10*16 + 4*8
|
||||
save_reg rsi, 10*16 + 5*8
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm8, [rsp + 2*16]
|
||||
movdqa xmm9, [rsp + 3*16]
|
||||
movdqa xmm10, [rsp + 4*16]
|
||||
movdqa xmm11, [rsp + 5*16]
|
||||
movdqa xmm12, [rsp + 6*16]
|
||||
movdqa xmm13, [rsp + 7*16]
|
||||
movdqa xmm14, [rsp + 8*16]
|
||||
movdqa xmm15, [rsp + 9*16]
|
||||
mov r12, [rsp + 10*16 + 0*8]
|
||||
mov r13, [rsp + 10*16 + 1*8]
|
||||
mov r14, [rsp + 10*16 + 2*8]
|
||||
mov r15, [rsp + 10*16 + 3*8]
|
||||
mov rdi, [rsp + 10*16 + 4*8]
|
||||
mov rsi, [rsp + 10*16 + 5*8]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
%define ptr arg5
|
||||
%define vec_i tmp2
|
||||
%define dest1 tmp3
|
||||
%define dest2 tmp4
|
||||
%define vskip1 tmp5
|
||||
%define vskip3 tmp6
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft1_lo xmm14
|
||||
%define xgft1_hi xmm13
|
||||
%define xgft2_lo xmm12
|
||||
%define xgft2_hi xmm11
|
||||
%define xgft3_lo xmm10
|
||||
%define xgft3_hi xmm9
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp1 xmm2
|
||||
%define xp2 xmm3
|
||||
%define xp3 xmm4
|
||||
%define xp4 xmm5
|
||||
%define xp5 xmm6
|
||||
%define xp6 xmm7
|
||||
|
||||
align 16
|
||||
global gf_6vect_dot_prod_sse:function
|
||||
func(gf_6vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
mov vskip1, vec
|
||||
imul vskip1, 32
|
||||
mov vskip3, vec
|
||||
imul vskip3, 96
|
||||
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||
mov dest1, [dest]
|
||||
mov dest2, [dest+PS]
|
||||
|
||||
|
||||
.loop16:
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
pxor xp1, xp1
|
||||
pxor xp2, xp2
|
||||
pxor xp3, xp3
|
||||
pxor xp4, xp4
|
||||
pxor xp5, xp5
|
||||
pxor xp6, xp6
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i]
|
||||
add vec_i, PS
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
|
||||
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp1, xgft1_hi ;xp1 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp2, xgft2_hi ;xp2 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp3, xgft3_hi ;xp3 += partial
|
||||
|
||||
|
||||
movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||
movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||
movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||
movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||
movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||
movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||
add tmp, 32
|
||||
|
||||
|
||||
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||
pxor xp4, xgft1_hi ;xp4 += partial
|
||||
|
||||
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||
pxor xp5, xgft2_hi ;xp5 += partial
|
||||
|
||||
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||
pxor xp6, xgft3_hi ;xp6 += partial
|
||||
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
|
||||
mov tmp, [dest+2*PS]
|
||||
mov ptr, [dest+3*PS]
|
||||
mov vec_i, [dest+4*PS]
|
||||
|
||||
XSTR [dest1+pos], xp1
|
||||
XSTR [dest2+pos], xp2
|
||||
XSTR [tmp+pos], xp3
|
||||
mov tmp, [dest+5*PS]
|
||||
XSTR [ptr+pos], xp4
|
||||
XSTR [vec_i+pos], xp5
|
||||
XSTR [tmp+pos], xp6
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
FUNC_RESTORE
|
||||
mov return, 0
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
|
225
erasure/src/gf-inverse-test.c
Normal file
225
erasure/src/gf-inverse-test.c
Normal file
@ -0,0 +1,225 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include <assert.h>
|
||||
|
||||
#include "erasure-code.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 128
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 200
|
||||
#endif
|
||||
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
|
||||
{
|
||||
int i, j, k;
|
||||
u8 d;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
d = 0;
|
||||
for (k = 0; k < n; k++) {
|
||||
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
||||
}
|
||||
c[i * n + j] = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_matrix(u8 * a, int n)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
printf(" %2x", a[i * n + j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int is_ident(u8 * a, const int n)
|
||||
{
|
||||
int i, j;
|
||||
u8 c;
|
||||
for (i = 0; i < n; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
c = *a++;
|
||||
if (i == j)
|
||||
c--;
|
||||
if (c != 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
|
||||
{
|
||||
memcpy(sav, in, n * n);
|
||||
|
||||
if (gf_invert_matrix(in, inv, n)) {
|
||||
printf("Given singular matrix\n");
|
||||
print_matrix(sav, n);
|
||||
return -1;
|
||||
}
|
||||
|
||||
matrix_mult(inv, sav, in, n);
|
||||
|
||||
if (is_ident(in, n)) {
|
||||
printf("fail\n");
|
||||
print_matrix(sav, n);
|
||||
print_matrix(inv, n);
|
||||
print_matrix(in, n);
|
||||
return -1;
|
||||
}
|
||||
putchar('.');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, k, t;
|
||||
u8 *test_mat, *save_mat, *invr_mat;
|
||||
|
||||
u8 test1[] = { 1, 1, 6,
|
||||
1, 1, 1,
|
||||
7, 1, 9
|
||||
};
|
||||
|
||||
u8 test2[] = { 0, 1, 6,
|
||||
1, 0, 1,
|
||||
0, 1, 9
|
||||
};
|
||||
|
||||
u8 test3[] = { 0, 0, 1,
|
||||
1, 0, 0,
|
||||
0, 1, 1
|
||||
};
|
||||
|
||||
u8 test4[] = { 0, 1, 6, 7,
|
||||
1, 1, 0, 0,
|
||||
0, 1, 2, 3,
|
||||
3, 2, 2, 3
|
||||
}; // = row3+3*row2
|
||||
|
||||
printf("gf_inverse_test: max=%d ", KMAX);
|
||||
|
||||
test_mat = malloc(KMAX * KMAX);
|
||||
save_mat = malloc(KMAX * KMAX);
|
||||
invr_mat = malloc(KMAX * KMAX);
|
||||
|
||||
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
||||
return -1;
|
||||
|
||||
// Test with lots of leading 1's
|
||||
k = 3;
|
||||
memcpy(test_mat, test1, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
return -1;
|
||||
|
||||
// Test with leading zeros
|
||||
k = 3;
|
||||
memcpy(test_mat, test2, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
return -1;
|
||||
|
||||
// Test 3
|
||||
k = 3;
|
||||
memcpy(test_mat, test3, k * k);
|
||||
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||
return -1;
|
||||
|
||||
// Test 4 - try a singular matrix
|
||||
k = 4;
|
||||
memcpy(test_mat, test4, k * k);
|
||||
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("Fail: didn't catch singular matrix\n");
|
||||
print_matrix(test4, 4);
|
||||
return -1;
|
||||
}
|
||||
// Do random test of size KMAX
|
||||
k = KMAX;
|
||||
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||
printf("rand picked a singular matrix, try again\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail\n");
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
return -1;
|
||||
}
|
||||
// Do Randoms. Random size and coefficients
|
||||
for (t = 0; t < RANDOMS; t++) {
|
||||
k = rand() % KMAX;
|
||||
|
||||
for (i = 0; i < k * k; i++)
|
||||
test_mat[i] = save_mat[i] = rand();
|
||||
|
||||
if (gf_invert_matrix(test_mat, invr_mat, k))
|
||||
continue;
|
||||
|
||||
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||
|
||||
if (is_ident(test_mat, k)) {
|
||||
printf("fail rand k=%d\n", k);
|
||||
print_matrix(save_mat, k);
|
||||
print_matrix(invr_mat, k);
|
||||
print_matrix(test_mat, k);
|
||||
return -1;
|
||||
}
|
||||
if (0 == (t % 8))
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf(" Pass\n");
|
||||
return 0;
|
||||
}
|
166
erasure/src/gf-vect-dot-prod-1tbl.c
Normal file
166
erasure/src/gf-vect-dot-prod-1tbl.c
Normal file
@ -0,0 +1,166 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure/tests.h"
|
||||
#include "erasure-code.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 4000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
|
||||
# define TEST_LOOPS 10
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
// Global GF(256) tables
|
||||
u8 gff[256];
|
||||
u8 gflog[256];
|
||||
u8 gf_mul_table[256 * 256];
|
||||
|
||||
void mk_gf_field()
|
||||
{
|
||||
int i;
|
||||
u8 s = 1;
|
||||
gflog[0] = 0;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
gff[i] = s;
|
||||
gflog[s] = i;
|
||||
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
||||
}
|
||||
}
|
||||
|
||||
void mk_gf_mul_table(u8 * table)
|
||||
{
|
||||
// Populate a single table with all multiply combinations for a fast,
|
||||
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
||||
int i, j;
|
||||
for (i = 0; i < 256; i++)
|
||||
for (j = 0; j < 256; j++)
|
||||
table[i * 256 + j] = gf_mul(i, j);
|
||||
}
|
||||
|
||||
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
||||
{
|
||||
int i, j;
|
||||
u8 s;
|
||||
for (i = 0; i < len; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < vlen; j++)
|
||||
s ^= gf_mul(src[j][i], v[j]);
|
||||
|
||||
dest[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int i, j, k;
|
||||
u8 s, vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN];
|
||||
u8 *matrix[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
mk_gf_field();
|
||||
mk_gf_mul_table(gf_mul_table);
|
||||
|
||||
//generate random vector and matrix/data
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
vec[i] = rand();
|
||||
|
||||
if (!(matrix[i] = malloc(TEST_LEN))) {
|
||||
fprintf(stderr, "Error failure\n\n");
|
||||
return -1;
|
||||
}
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
matrix[i][j] = rand();
|
||||
|
||||
}
|
||||
|
||||
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++)
|
||||
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
|
||||
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
|
||||
// Warm up mult tables
|
||||
for (i = 0; i < TEST_LEN; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
|
||||
}
|
||||
dest2[i] = s;
|
||||
}
|
||||
|
||||
perf_start(&start);
|
||||
for (k = 0; k < TEST_LOOPS; k++) {
|
||||
for (i = 0; i < TEST_LEN; i++) {
|
||||
s = 0;
|
||||
for (j = 0; j < TEST_SOURCES; j++) {
|
||||
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
|
||||
}
|
||||
dest2[i] = s;
|
||||
}
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * k);
|
||||
|
||||
// Compare with reference function
|
||||
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
||||
printf("Error, different results!\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Pass functional test\n");
|
||||
return 0;
|
||||
}
|
184
erasure/src/gf-vect-dot-prod-avx-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-avx-perf.c
Normal file
@ -0,0 +1,184 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
}
|
525
erasure/src/gf-vect-dot-prod-avx-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-avx-test.c
Normal file
@ -0,0 +1,525 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptr;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||
dump_matrix(buffs, 5, srcs);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 5);
|
||||
printf("dprod:");
|
||||
dump(dest, 5);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, align);
|
||||
printf("dprod:");
|
||||
dump(dest, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||
|
||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(udest_ptr, 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptr - dest;
|
||||
|
||||
if (memcmp(dest, dest_ref, offset)) {
|
||||
printf("Fail rand ualign pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
198
erasure/src/gf-vect-dot-prod-avx.asm
Normal file
198
erasure/src/gf-vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,198 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm5
|
||||
%define xgft_lo xmm4
|
||||
%define xgft_hi xmm3
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_avx:function
|
||||
func(gf_vect_dot_prod_avx)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
.loop16:
|
||||
vpxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i*PS]
|
||||
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||
vpxor xp, xp, xgft_hi ;xp += partial
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest+pos], xp
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
poly:
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_avx, 02, 03, 0061
|
203
erasure/src/gf-vect-dot-prod-avx2.asm
Normal file
203
erasure/src/gf-vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,203 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR vmovdqu
|
||||
%define XSTR vmovdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f ymm3
|
||||
%define xmask0fx xmm3
|
||||
%define xgft_lo ymm4
|
||||
%define xgft_hi ymm5
|
||||
|
||||
%define x0 ymm0
|
||||
%define xtmpa ymm1
|
||||
%define xp ymm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_avx2:function
|
||||
func(gf_vect_dot_prod_avx2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
mov tmp.b, 0x0f
|
||||
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||
|
||||
.loop32:
|
||||
vpxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i*PS]
|
||||
|
||||
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
|
||||
vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
|
||||
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
|
||||
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
|
||||
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||
vpxor xp, xp, xgft_hi ;xp += partial
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest+pos], xp
|
||||
add pos, 32 ;Loop on 32 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop32
|
||||
|
||||
lea tmp, [len + 32]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-32
|
||||
jmp .loop32 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
|
290
erasure/src/gf-vect-dot-prod-base-test.c
Normal file
290
erasure/src/gf-vect-dot-prod-base-test.c
Normal file
@ -0,0 +1,290 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 250
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Init
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_cauchy1_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
184
erasure/src/gf-vect-dot-prod-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-perf.c
Normal file
@ -0,0 +1,184 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
}
|
184
erasure/src/gf-vect-dot-prod-sse-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,184 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_SOURCES 10
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 40000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||
# define TEST_LOOPS 100
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||
struct perf start, stop;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Performance test
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
#ifdef DO_REF_PERF
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
#endif
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
for (j = 0; j < TEST_SOURCES; j++)
|
||||
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("pass perf check\n");
|
||||
return 0;
|
||||
}
|
525
erasure/src/gf-vect-dot-prod-sse-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,525 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 16
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptr;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||
dump_matrix(buffs, 5, srcs);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 5);
|
||||
printf("dprod:");
|
||||
dump(dest, 5);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, align);
|
||||
printf("dprod:");
|
||||
dump(dest, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||
|
||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(udest_ptr, 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptr - dest;
|
||||
|
||||
if (memcmp(dest, dest_ref, offset)) {
|
||||
printf("Fail rand ualign pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
195
erasure/src/gf-vect-dot-prod-sse.asm
Normal file
195
erasure/src/gf-vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,195 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
|
||||
%define arg4 r12 ; must be saved and loaded
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 rdi ; must be saved and loaded
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define frame_size 2*8
|
||||
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
rex_push_reg r12
|
||||
push_reg rdi
|
||||
end_prolog
|
||||
mov arg4, arg(4)
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
pop rdi
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define vec arg1
|
||||
%define mul_array arg2
|
||||
%define src arg3
|
||||
%define dest arg4
|
||||
|
||||
%define vec_i tmp2
|
||||
%define ptr tmp3
|
||||
%define pos return
|
||||
|
||||
|
||||
%ifndef EC_ALIGNED_ADDR
|
||||
;;; Use Un-aligned load/store
|
||||
%define XLDR movdqu
|
||||
%define XSTR movdqu
|
||||
%else
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
%endif
|
||||
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm5
|
||||
%define xgft_lo xmm4
|
||||
%define xgft_hi xmm3
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmpa xmm1
|
||||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_sse:function
|
||||
func(gf_vect_dot_prod_sse)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
xor pos, pos
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
||||
.loop16:
|
||||
pxor xp, xp
|
||||
mov tmp, mul_array
|
||||
xor vec_i, vec_i
|
||||
|
||||
.next_vect:
|
||||
mov ptr, [src+vec_i*PS]
|
||||
movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||
movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||
XLDR x0, [ptr+pos] ;Get next source vector
|
||||
add tmp, 32
|
||||
add vec_i, 1
|
||||
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||
pshufb xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||
pxor xgft_hi, xgft_lo ;GF add high and low partials
|
||||
pxor xp, xgft_hi ;xp += partial
|
||||
cmp vec_i, vec
|
||||
jl .next_vect
|
||||
|
||||
XSTR [dest+pos], xp
|
||||
|
||||
add pos, 16 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
jle .loop16
|
||||
|
||||
lea tmp, [len + 16]
|
||||
cmp pos, tmp
|
||||
je .return_pass
|
||||
|
||||
;; Tail len
|
||||
mov pos, len ;Overlapped offset length-16
|
||||
jmp .loop16 ;Do one more overlap pass
|
||||
|
||||
.return_pass:
|
||||
mov return, 0
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
.return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_sse, 00, 03, 0060
|
525
erasure/src/gf-vect-dot-prod-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-test.c
Normal file
@ -0,0 +1,525 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset, memcmp
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/types.h"
|
||||
|
||||
#ifndef FUNCTION_UNDER_TEST
|
||||
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||
#endif
|
||||
#ifndef TEST_MIN_SIZE
|
||||
# define TEST_MIN_SIZE 32
|
||||
#endif
|
||||
|
||||
#define str(s) #s
|
||||
#define xstr(s) str(s)
|
||||
|
||||
#define TEST_LEN 8192
|
||||
#define TEST_SIZE (TEST_LEN/2)
|
||||
|
||||
#ifndef TEST_SOURCES
|
||||
# define TEST_SOURCES 16
|
||||
#endif
|
||||
#ifndef RANDOMS
|
||||
# define RANDOMS 20
|
||||
#endif
|
||||
|
||||
#define MMAX TEST_SOURCES
|
||||
#define KMAX TEST_SOURCES
|
||||
|
||||
#ifdef EC_ALIGNED_ADDR
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 0
|
||||
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||
#else
|
||||
// Define power of 2 range to check ptr, len alignment
|
||||
# define PTR_ALIGN_CHK_B 32
|
||||
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||
#endif
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
void dump(unsigned char *buf, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len;) {
|
||||
printf(" %2x", 0xff & buf[i++]);
|
||||
if (i % 32 == 0)
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_matrix(unsigned char **s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", s[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||
{
|
||||
int i, j;
|
||||
for (i = 0; i < k; i++) {
|
||||
for (j = 0; j < m; j++) {
|
||||
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||
void *buf;
|
||||
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||
|
||||
int align, size;
|
||||
unsigned char *efence_buffs[TEST_SOURCES];
|
||||
unsigned int offset;
|
||||
u8 *ubuffs[TEST_SOURCES];
|
||||
u8 *udest_ptr;
|
||||
|
||||
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||
|
||||
// Allocate the arrays
|
||||
for (i = 0; i < TEST_SOURCES; i++) {
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
buffs[i] = buf;
|
||||
}
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
dest_ref = buf;
|
||||
|
||||
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||
printf("alloc error: Fail");
|
||||
return -1;
|
||||
}
|
||||
temp_buff = buf;
|
||||
|
||||
// Test of all zeros
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
memset(buffs[i], 0, TEST_LEN);
|
||||
|
||||
memset(dest, 0, TEST_LEN);
|
||||
memset(temp_buff, 0, TEST_LEN);
|
||||
memset(dest_ref, 0, TEST_LEN);
|
||||
memset(g, 0, TEST_SOURCES);
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
} else
|
||||
putchar('.');
|
||||
|
||||
// Rand data test
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Rand data test with varied parameters
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||
dump_matrix(buffs, 5, srcs);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 5);
|
||||
printf("dprod:");
|
||||
dump(dest, 5);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
}
|
||||
|
||||
// Test erasure code using gf_vect_dot_prod
|
||||
|
||||
// Pick a first test
|
||||
m = 9;
|
||||
k = 5;
|
||||
if (m > MMAX || k > KMAX)
|
||||
return -1;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random buffers in erasure
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Do more random tests
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
while ((m = (rand() % MMAX)) < 2) ;
|
||||
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||
|
||||
if (m > MMAX || k > KMAX)
|
||||
continue;
|
||||
|
||||
gf_gen_rs_matrix(a, m, k);
|
||||
|
||||
// Make random data
|
||||
for (i = 0; i < k; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
// Make parity vects
|
||||
for (i = k; i < m; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Random errors
|
||||
memset(src_in_err, 0, TEST_SOURCES);
|
||||
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||
err = 1 & rand();
|
||||
src_in_err[i] = err;
|
||||
if (err)
|
||||
src_err_list[nerrs++] = i;
|
||||
}
|
||||
if (nerrs == 0) { // should have at least one error
|
||||
while ((err = (rand() % KMAX)) >= k) ;
|
||||
src_err_list[nerrs++] = err;
|
||||
src_in_err[err] = 1;
|
||||
}
|
||||
// construct b by removing error rows
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; j++)
|
||||
b[k * i + j] = a[k * r + j];
|
||||
}
|
||||
|
||||
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||
printf("BAD MATRIX\n");
|
||||
|
||||
for (i = 0, r = 0; i < k; i++, r++) {
|
||||
while (src_in_err[r]) {
|
||||
r++;
|
||||
continue;
|
||||
}
|
||||
recov[i] = buffs[r];
|
||||
}
|
||||
|
||||
// Recover data
|
||||
for (i = 0; i < nerrs; i++) {
|
||||
for (j = 0; j < k; j++)
|
||||
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||
#ifndef USEREF
|
||||
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||
#else
|
||||
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||
#endif
|
||||
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||
printf(" - erase list = ");
|
||||
for (i = 0; i < nerrs; i++)
|
||||
printf(" %d", src_err_list[i]);
|
||||
printf("\na:\n");
|
||||
dump_u8xu8((u8 *) a, m, k);
|
||||
printf("inv b:\n");
|
||||
dump_u8xu8((u8 *) d, k, k);
|
||||
printf("orig data:\n");
|
||||
dump_matrix(buffs, m, 25);
|
||||
printf("orig :");
|
||||
dump(buffs[src_err_list[i]], 25);
|
||||
printf("recov %d:", src_err_list[i]);
|
||||
dump(temp_buff, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
for (j = 0; j < TEST_LEN; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < TEST_SOURCES; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||
|
||||
if (0 != memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, align);
|
||||
printf("dprod:");
|
||||
dump(dest, align);
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test rand ptr alignment if available
|
||||
|
||||
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||
srcs = rand() % TEST_SOURCES;
|
||||
if (srcs == 0)
|
||||
continue;
|
||||
|
||||
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||
// Add random offsets
|
||||
for (i = 0; i < srcs; i++)
|
||||
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||
|
||||
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
ubuffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||
|
||||
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||
srcs);
|
||||
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(udest_ptr, 25);
|
||||
return -1;
|
||||
}
|
||||
// Confirm that padding around dests is unchanged
|
||||
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||
offset = udest_ptr - dest;
|
||||
|
||||
if (memcmp(dest, dest_ref, offset)) {
|
||||
printf("Fail rand ualign pad start\n");
|
||||
return -1;
|
||||
}
|
||||
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||
printf("Fail rand ualign pad end\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Test all size alignment
|
||||
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||
|
||||
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||
srcs = TEST_SOURCES;
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
for (j = 0; j < size; j++)
|
||||
buffs[i][j] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
g[i] = rand();
|
||||
|
||||
for (i = 0; i < srcs; i++)
|
||||
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||
|
||||
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||
|
||||
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||
|
||||
if (memcmp(dest_ref, dest, size)) {
|
||||
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||
size);
|
||||
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||
printf("dprod_base:");
|
||||
dump(dest_ref, 25);
|
||||
printf("dprod:");
|
||||
dump(dest, 25);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
printf("done all: Pass\n");
|
||||
return 0;
|
||||
}
|
99
erasure/src/gf-vect-mul-avx-perf.c
Normal file
99
erasure/src/gf-vect-mul-avx-perf.c
Normal file
@ -0,0 +1,99 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 4000000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN GT_L3_CACHE / 2
|
||||
# define TEST_LOOPS 1000
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define TEST_MEM (2 * TEST_LEN)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||
struct perf start, stop;
|
||||
|
||||
printf("gf_vect_mul_avx_perf:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
// Allocate large mem region
|
||||
buff1 = (u8 *) malloc(TEST_LEN);
|
||||
buff2 = (u8 *) malloc(TEST_LEN);
|
||||
if (NULL == buff1 || NULL == buff2) {
|
||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(buff1, 0, TEST_LEN);
|
||||
memset(buff2, 0, TEST_LEN);
|
||||
|
||||
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
|
||||
printf("Start timed tests\n");
|
||||
fflush(0);
|
||||
|
||||
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_mul_avx" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||
|
||||
return 0;
|
||||
}
|
143
erasure/src/gf-vect-mul-avx-test.c
Normal file
143
erasure/src/gf-vect-mul-avx-test.c
Normal file
@ -0,0 +1,143 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
|
||||
#define TEST_SIZE 8192
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 100000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
unsigned char *efence_buff3;
|
||||
|
||||
printf("gf_vect_mul_avx:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
printf("Random tests ");
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
efence_buff3 = buff3 + size;
|
||||
|
||||
gf_vect_mul_avx(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (efence_buff2[i] != efence_buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, efence_buff2[i], efence_buff3[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
return 0;
|
||||
}
|
172
erasure/src/gf-vect-mul-avx.asm
Normal file
172
erasure/src/gf-vect-mul-avx.asm
Normal file
@ -0,0 +1,172 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mul_avx(len, mul_array, src, dest)
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm13, 2*16
|
||||
save_xmm128 xmm14, 3*16
|
||||
save_xmm128 xmm15, 4*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
vmovdqa xmm6, [rsp + 0*16]
|
||||
vmovdqa xmm7, [rsp + 1*16]
|
||||
vmovdqa xmm13, [rsp + 2*16]
|
||||
vmovdqa xmm14, [rsp + 3*16]
|
||||
vmovdqa xmm15, [rsp + 4*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define mul_array arg1
|
||||
%define src arg2
|
||||
%define dest arg3
|
||||
%define pos return
|
||||
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR vmovdqa
|
||||
%define XSTR vmovdqa
|
||||
%else
|
||||
%define XLDR vmovntdqa
|
||||
%define XSTR vmovntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft_lo xmm14
|
||||
%define xgft_hi xmm13
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmp1a xmm1
|
||||
%define xtmp1b xmm2
|
||||
%define xtmp1c xmm3
|
||||
%define x1 xmm4
|
||||
%define xtmp2a xmm5
|
||||
%define xtmp2b xmm6
|
||||
%define xtmp2c xmm7
|
||||
|
||||
align 16
|
||||
global gf_vect_mul_avx:function
|
||||
func(gf_vect_mul_avx)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||
add pos, 32 ;Loop on 16 bytes at a time
|
||||
cmp pos, len
|
||||
vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||
vpand xtmp2a, x1, xmask0f
|
||||
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
vpsraw x1, x1, 4
|
||||
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
vpand x1, x1, xmask0f
|
||||
vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||
vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble
|
||||
vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble
|
||||
vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble
|
||||
vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials
|
||||
vpxor xtmp2b, xtmp2b, xtmp2c
|
||||
XSTR [dest+pos-32], xtmp1b ;Store result
|
||||
XSTR [dest+pos-16], xtmp2b ;Store +16B result
|
||||
jl loop32
|
||||
|
||||
|
||||
return_pass:
|
||||
FUNC_RESTORE
|
||||
sub pos, len
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
FUNC_RESTORE
|
||||
mov return, 1
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_mul_avx, 01, 02, 0036
|
129
erasure/src/gf-vect-mul-base-test.c
Normal file
129
erasure/src/gf-vect-mul-base-test.c
Normal file
@ -0,0 +1,129 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
|
||||
#define TEST_SIZE 8192
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 100000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
|
||||
printf("gf_vect_mul_base_test:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
printf("Random tests ");
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
return 0;
|
||||
}
|
99
erasure/src/gf-vect-mul-perf.c
Normal file
99
erasure/src/gf-vect-mul-perf.c
Normal file
@ -0,0 +1,99 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 4000000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN GT_L3_CACHE / 2
|
||||
# define TEST_LOOPS 1000
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define TEST_MEM (2 * TEST_LEN)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||
struct perf start, stop;
|
||||
|
||||
printf("gf_vect_mul_perf:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
// Allocate large mem region
|
||||
buff1 = (u8 *) malloc(TEST_LEN);
|
||||
buff2 = (u8 *) malloc(TEST_LEN);
|
||||
if (NULL == buff1 || NULL == buff2) {
|
||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(buff1, 0, TEST_LEN);
|
||||
memset(buff2, 0, TEST_LEN);
|
||||
|
||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
|
||||
printf("Start timed tests\n");
|
||||
fflush(0);
|
||||
|
||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||
|
||||
return 0;
|
||||
}
|
97
erasure/src/gf-vect-mul-sse-perf.c
Normal file
97
erasure/src/gf-vect-mul-sse-perf.c
Normal file
@ -0,0 +1,97 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
#include "erasure/tests.h"
|
||||
|
||||
//#define CACHED_TEST
|
||||
#ifdef CACHED_TEST
|
||||
// Cached test, loop many times over small dataset
|
||||
# define TEST_LEN 8*1024
|
||||
# define TEST_LOOPS 4000000
|
||||
# define TEST_TYPE_STR "_warm"
|
||||
#else
|
||||
# ifndef TEST_CUSTOM
|
||||
// Uncached test. Pull from large mem base.
|
||||
# define TEST_SOURCES 10
|
||||
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||
# define TEST_LEN GT_L3_CACHE / 2
|
||||
# define TEST_LOOPS 1000
|
||||
# define TEST_TYPE_STR "_cold"
|
||||
# else
|
||||
# define TEST_TYPE_STR "_cus"
|
||||
# ifndef TEST_LOOPS
|
||||
# define TEST_LOOPS 1000
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define TEST_MEM (2 * TEST_LEN)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||
struct perf start, stop;
|
||||
|
||||
printf("gf_vect_mul_sse_perf:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
// Allocate large mem region
|
||||
buff1 = (u8 *) malloc(TEST_LEN);
|
||||
buff2 = (u8 *) malloc(TEST_LEN);
|
||||
if (NULL == buff1 || NULL == buff2) {
|
||||
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(buff1, 0, TEST_LEN);
|
||||
memset(buff2, 0, TEST_LEN);
|
||||
|
||||
printf("Start timed tests\n");
|
||||
fflush(0);
|
||||
|
||||
gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
perf_start(&start);
|
||||
for (i = 0; i < TEST_LOOPS; i++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl); // in a re-build would only calc once
|
||||
gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||
}
|
||||
perf_stop(&stop);
|
||||
printf("gf_vect_mul_sse" TEST_TYPE_STR ": ");
|
||||
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||
|
||||
return 0;
|
||||
}
|
160
erasure/src/gf-vect-mul-sse-test.c
Normal file
160
erasure/src/gf-vect-mul-sse-test.c
Normal file
@ -0,0 +1,160 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "erasure-code.h"
|
||||
|
||||
#define TEST_SIZE (128*1024)
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int tsize;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
unsigned char *efence_buff3;
|
||||
|
||||
printf("gf_vect_mul_sse_test: ");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
|
||||
buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return -1;
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Check buffer len
|
||||
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
||||
a = rand();
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul_sse(tsize, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < tsize; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return -1;
|
||||
}
|
||||
if (0 == tsize % (32 * 8)) {
|
||||
putchar('.');
|
||||
fflush(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
efence_buff3 = buff3 + size;
|
||||
|
||||
gf_vect_mul_sse(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (efence_buff2[i] != efence_buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, efence_buff2[i], efence_buff3[i], gf_mul(2,
|
||||
efence_buff1
|
||||
[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
fflush(0);
|
||||
return 0;
|
||||
}
|
178
erasure/src/gf-vect-mul-sse.asm
Normal file
178
erasure/src/gf-vect-mul-sse.asm
Normal file
@ -0,0 +1,178 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in
|
||||
; the documentation and/or other materials provided with the
|
||||
; distribution.
|
||||
; * Neither the name of Intel Corporation nor the names of its
|
||||
; contributors may be used to endorse or promote products derived
|
||||
; from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;;
|
||||
;;; gf_vect_mul_sse(len, mul_array, src, dest)
|
||||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
%define arg2 r8
|
||||
%define arg3 r9
|
||||
%define return rax
|
||||
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||
%define func(x) proc_frame x
|
||||
%macro FUNC_SAVE 0
|
||||
alloc_stack stack_size
|
||||
save_xmm128 xmm6, 0*16
|
||||
save_xmm128 xmm7, 1*16
|
||||
save_xmm128 xmm13, 2*16
|
||||
save_xmm128 xmm14, 3*16
|
||||
save_xmm128 xmm15, 4*16
|
||||
end_prolog
|
||||
%endmacro
|
||||
|
||||
%macro FUNC_RESTORE 0
|
||||
movdqa xmm6, [rsp + 0*16]
|
||||
movdqa xmm7, [rsp + 1*16]
|
||||
movdqa xmm13, [rsp + 2*16]
|
||||
movdqa xmm14, [rsp + 3*16]
|
||||
movdqa xmm15, [rsp + 4*16]
|
||||
add rsp, stack_size
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
%define len arg0
|
||||
%define mul_array arg1
|
||||
%define src arg2
|
||||
%define dest arg3
|
||||
%define pos return
|
||||
|
||||
|
||||
;;; Use Non-temporal load/stor
|
||||
%ifdef NO_NT_LDST
|
||||
%define XLDR movdqa
|
||||
%define XSTR movdqa
|
||||
%else
|
||||
%define XLDR movntdqa
|
||||
%define XSTR movntdq
|
||||
%endif
|
||||
|
||||
default rel
|
||||
|
||||
[bits 64]
|
||||
section .text
|
||||
|
||||
%define xmask0f xmm15
|
||||
%define xgft_lo xmm14
|
||||
%define xgft_hi xmm13
|
||||
|
||||
%define x0 xmm0
|
||||
%define xtmp1a xmm1
|
||||
%define xtmp1b xmm2
|
||||
%define xtmp1c xmm3
|
||||
%define x1 xmm4
|
||||
%define xtmp2a xmm5
|
||||
%define xtmp2b xmm6
|
||||
%define xtmp2c xmm7
|
||||
|
||||
|
||||
align 16
|
||||
global gf_vect_mul_sse:function
|
||||
func(gf_vect_mul_sse)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||
movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||
|
||||
loop32:
|
||||
XLDR x0, [src+pos] ;Get next source vector
|
||||
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||
movdqa xtmp1b, xgft_hi ;Reload const array registers
|
||||
movdqa xtmp1c, xgft_lo
|
||||
movdqa xtmp2b, xgft_hi
|
||||
movdqa xtmp2c, xgft_lo
|
||||
movdqa xtmp1a, x0 ;Keep unshifted copy of src
|
||||
movdqa xtmp2a, x1
|
||||
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||
psraw x1, 4
|
||||
pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0
|
||||
pand xtmp2a, xmask0f
|
||||
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||
pand x1, xmask0f
|
||||
pshufb xtmp1b, x0 ;Lookup mul table of high nibble
|
||||
pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble
|
||||
pshufb xtmp2b, x1
|
||||
pshufb xtmp2c, xtmp2a
|
||||
pxor xtmp1b, xtmp1c ;GF add high and low partials
|
||||
pxor xtmp2b, xtmp2c
|
||||
XSTR [dest+pos], xtmp1b ;Store result
|
||||
XSTR [dest+pos+16], xtmp2b ;Store +16B result
|
||||
add pos, 32 ;Loop on 32 bytes at at time
|
||||
cmp pos, len
|
||||
jl loop32
|
||||
|
||||
|
||||
return_pass:
|
||||
sub pos, len
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
return_fail:
|
||||
mov return, 1
|
||||
FUNC_RESTORE
|
||||
ret
|
||||
|
||||
endproc_frame
|
||||
|
||||
section .data
|
||||
|
||||
align 16
|
||||
mask0f:
|
||||
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||
|
||||
%macro slversion 4
|
||||
global %1_slver_%2%3%4
|
||||
global %1_slver
|
||||
%1_slver:
|
||||
%1_slver_%2%3%4:
|
||||
dw 0x%4
|
||||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_mul_sse, 00, 02, 0034
|
142
erasure/src/gf-vect-mul-test.c
Normal file
142
erasure/src/gf-vect-mul-test.c
Normal file
@ -0,0 +1,142 @@
|
||||
/**********************************************************************
|
||||
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h> // for memset
|
||||
#include "erasure-code.h"
|
||||
|
||||
#define TEST_SIZE 8192
|
||||
#define TEST_MEM TEST_SIZE
|
||||
#define TEST_LOOPS 100000
|
||||
#define TEST_TYPE_STR ""
|
||||
|
||||
typedef unsigned char u8;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||
int align, size;
|
||||
unsigned char *efence_buff1;
|
||||
unsigned char *efence_buff2;
|
||||
unsigned char *efence_buff3;
|
||||
|
||||
printf("gf_vect_mul_test:\n");
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
|
||||
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||
|
||||
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||
printf("buffer alloc error\n");
|
||||
return -1;
|
||||
}
|
||||
// Fill with rand data
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||
gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
if (buff2[i] != buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++)
|
||||
buff1[i] = rand();
|
||||
|
||||
// Check each possible constant
|
||||
printf("Random tests ");
|
||||
for (a = 0; a != 255; a++) {
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
// Run tests at end of buffer for Electric Fence
|
||||
align = 32;
|
||||
a = 2;
|
||||
|
||||
gf_vect_mul_init(a, gf_const_tbl);
|
||||
for (size = 0; size < TEST_SIZE; size += align) {
|
||||
// Line up TEST_SIZE from end
|
||||
efence_buff1 = buff1 + size;
|
||||
efence_buff2 = buff2 + size;
|
||||
efence_buff3 = buff3 + size;
|
||||
|
||||
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||
i, efence_buff1[i], efence_buff2[i],
|
||||
gf_mul(2, efence_buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||
|
||||
// Check reference function
|
||||
for (i = 0; i < TEST_SIZE - size; i++)
|
||||
if (efence_buff2[i] != efence_buff3[i]) {
|
||||
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||
i, a, efence_buff2[i], efence_buff3[i],
|
||||
gf_mul(2, efence_buff1[i]));
|
||||
return 1;
|
||||
}
|
||||
|
||||
putchar('.');
|
||||
}
|
||||
|
||||
printf(" done: Pass\n");
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user