mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
Initial commit
This commit is contained in:
commit
397b887a87
202
LICENSE
Normal file
202
LICENSE
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
11
NOTICE
Normal file
11
NOTICE
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
Mini Object Storage
|
||||||
|
Copyright 2014 Minios, Inc.
|
||||||
|
|
||||||
|
This product includes software developed at Minios, Inc.
|
||||||
|
(http://minios.io/).
|
||||||
|
|
||||||
|
The Minios project contains unmodified subcomponents under the contrib
|
||||||
|
folder with separate copyright notices and license terms. Your use of
|
||||||
|
the source code for the these subcomponents is subject to the terms
|
||||||
|
and conditions of the following licenses.
|
||||||
|
|
51
docs/git/workflow.md
Normal file
51
docs/git/workflow.md
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
Git Workflow
|
||||||
|
============
|
||||||
|
|
||||||
|
Update local repo with latest changes from upstream
|
||||||
|
```sh
|
||||||
|
git fetch
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a new branch from the latest code
|
||||||
|
```sh
|
||||||
|
git checkout origin/master
|
||||||
|
git checkout -b new_feature_branch
|
||||||
|
```
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# do work here
|
||||||
|
```
|
||||||
|
|
||||||
|
Create commit for submission
|
||||||
|
```sh
|
||||||
|
git commit -m "My Commit Message"
|
||||||
|
```
|
||||||
|
|
||||||
|
Prepare commit for inclusion
|
||||||
|
```sh
|
||||||
|
git fetch
|
||||||
|
git rebase origin/master
|
||||||
|
```
|
||||||
|
|
||||||
|
Assuming no conflict, push to your personal fork.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git push myrepo new_feature_branch:new_feature_branch
|
||||||
|
# Visit https://github.com/minios/minios and create a new pull request
|
||||||
|
from your branch.
|
||||||
|
```
|
||||||
|
|
||||||
|
Useful Tools
|
||||||
|
------------
|
||||||
|
As an alternative to manually pushing and creating a branch, use github.com/docker/gordon pulls send command:
|
||||||
|
|
||||||
|
Create a new pull request.
|
||||||
|
```sh
|
||||||
|
pulls send
|
||||||
|
# automatically performs git push and creates pull request
|
||||||
|
```
|
||||||
|
|
||||||
|
Update an existing pull request (e.g. PR 42)
|
||||||
|
```sh
|
||||||
|
pulls send 42
|
||||||
|
```
|
4
docs/internal/DESIGN
Normal file
4
docs/internal/DESIGN
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
- Erasure code - http://bit.ly/1yqxkUt (intel isal library)
|
||||||
|
- Future erasure codes to look at - Fountain Codes, Simple XOR techniques, Custom
|
||||||
|
- Bootstrap techniques - no downtime scenario
|
||||||
|
- Simple API's
|
9
docs/internal/INFRA
Normal file
9
docs/internal/INFRA
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
- Google groups development (minios-dev)
|
||||||
|
- Github groups (https://github.com/minios)
|
||||||
|
- IRC users #minios
|
||||||
|
- Support (zendesk.com)
|
||||||
|
- Community Q/A (stackoverflow.com)
|
||||||
|
- https://github.com/justinwalsh/daux.io (Document generator) or 'metalsmith.io'
|
||||||
|
- Continous integration - (https://drone.io/, https://codeship.io,
|
||||||
|
http://wercker.com/, https://coveralls.io/)
|
||||||
|
- Web UI - polymer project
|
10
docs/metalsmith.json
Normal file
10
docs/metalsmith.json
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"source": "src",
|
||||||
|
"destination": "build",
|
||||||
|
"plugins": {
|
||||||
|
"metalsmith-drafts": true,
|
||||||
|
"metalsmith-markdown": true,
|
||||||
|
"metalsmith-permalinks": "posts/:title",
|
||||||
|
"metalsmith-templates": "handlebars"
|
||||||
|
}
|
||||||
|
}
|
3
erasure/.gitignore
vendored
Normal file
3
erasure/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
*.o
|
||||||
|
*.a
|
||||||
|
*.so
|
202
erasure/LICENSE
Normal file
202
erasure/LICENSE
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
26
erasure/LICENSE.INTEL
Normal file
26
erasure/LICENSE.INTEL
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
42
erasure/Makefile
Normal file
42
erasure/Makefile
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
########################################################################
|
||||||
|
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of Intel Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
units = src
|
||||||
|
|
||||||
|
default: slib
|
||||||
|
|
||||||
|
include $(foreach unit,$(units), $(unit)/Makefile)
|
||||||
|
|
||||||
|
# Override individual lib names to make one inclusive library.
|
||||||
|
lib_name := isa-l.a
|
||||||
|
|
||||||
|
include make.inc
|
||||||
|
|
||||||
|
VPATH = $(units) include
|
78
erasure/Makefile.nmake
Normal file
78
erasure/Makefile.nmake
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
########################################################################
|
||||||
|
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of Intel Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
objs = src\ec-base.obj src\ec-highlevel-func.obj src\ec-multisrcary.obj src\gf-2vect-dot-prod-avx.obj src\gf-2vect-dot-prod-avx2.obj src\gf-2vect-dot-prod-sse.obj src\gf-3vect-dot-prod-avx.obj src\gf-3vect-dot-prod-avx2.obj src\gf-3vect-dot-prod-sse.obj src\gf-4vect-dot-prod-avx.obj src\gf-4vect-dot-prod-avx2.obj src\gf-4vect-dot-prod-sse.obj src\gf-5vect-dot-prod-avx.obj src\gf-5vect-dot-prod-avx2.obj src\gf-5vect-dot-prod-sse.obj src\gf-6vect-dot-prod-avx.obj src\gf-6vect-dot-prod-avx2.obj src\gf-6vect-dot-prod-sse.obj src\gf-vect-dot-prod-avx.obj src\gf-vect-dot-prod-avx2.obj src\gf-vect-dot-prod-sse.obj src\gf-vect-mul-avx.obj src\gf-vect-mul-sse.obj
|
||||||
|
|
||||||
|
libpath = c:\openssl\lib #set to ossl path for tests
|
||||||
|
lisrcc = c:\openssl\include
|
||||||
|
zlibpath = c:\zlib\lib
|
||||||
|
zlisrcc = c:\zlib\include
|
||||||
|
INCLUDES = -Isrc -Iinclude -I$(lisrcc) -I$(zlisrcc)
|
||||||
|
CFLAGS = -O2 -D ZLIB-WINAPI -D NDEBUG /nologo -D-USE-MATH-DEFINES -Qstd=c99 $(INCLUDES) $(D)
|
||||||
|
AFLAGS = -f win64 $(INCLUDES) $(D)
|
||||||
|
CC = icl
|
||||||
|
AS = yasm
|
||||||
|
|
||||||
|
lib: src isa-l.lib
|
||||||
|
|
||||||
|
src: ; -mkdir $@
|
||||||
|
|
||||||
|
isa-l.lib: $(objs)
|
||||||
|
lib -out:$@ $?
|
||||||
|
|
||||||
|
{erasure-code}.c.obj:
|
||||||
|
$(CC) $(CFLAGS) /c -Fo$@ $?
|
||||||
|
{erasure-code}.asm.obj:
|
||||||
|
$(AS) $(AFLAGS) -o $@ $?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.obj.exe:
|
||||||
|
link /out:$@ /nologo /libpath:$(libpath) /libpath:$(zlibpath) isa-l.lib $?
|
||||||
|
|
||||||
|
# Unit tests
|
||||||
|
tests = erasure-code-base-test.exe erasure-code-sse-test.exe erasure-code-test.exe gf-2vect-dot-prod-sse-test.exe gf-3vect-dot-prod-sse-test.exe gf-4vect-dot-prod-sse-test.exe gf-5vect-dot-prod-sse-test.exe gf-6vect-dot-prod-sse-test.exe gf-inverse-test.exe gf-vect-dot-prod-avx-test.exe gf-vect-dot-prod-base-test.exe gf-vect-dot-prod-sse-test.exe gf-vect-dot-prod-test.exe gf-vect-mul-avx-test.exe gf-vect-mul-base-test.exe gf-vect-mul-sse-test.exe gf-vect-mul-test.exe
|
||||||
|
|
||||||
|
tests: lib $(tests)
|
||||||
|
$(tests): $(@B).obj
|
||||||
|
|
||||||
|
# Performance tests
|
||||||
|
perfs = erasure-code-base-perf.exe erasure-code-perf.exe erasure-code-sse-perf.exe gf-2vect-dot-prod-sse-perf.exe gf-3vect-dot-prod-sse-perf.exe gf-4vect-dot-prod-sse-perf.exe gf-5vect-dot-prod-sse-perf.exe gf-6vect-dot-prod-sse-perf.exe gf-vect-dot-prod-1tbl.exe gf-vect-dot-prod-avx-perf.exe gf-vect-dot-prod-perf.exe gf-vect-dot-prod-sse-perf.exe gf-vect-mul-avx-perf.exe gf-vect-mul-perf.exe gf-vect-mul-sse-perf.exe
|
||||||
|
|
||||||
|
perfs: lib $(perfs)
|
||||||
|
$(perfs): $(@B).obj
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-if exist *.obj del *.obj
|
||||||
|
-if exist src\*.obj del src\*.obj
|
||||||
|
-if exist *.exe del *.exe
|
||||||
|
-if exist isa-l.lib del isa-l.lib
|
||||||
|
|
||||||
|
zlib.lib:
|
||||||
|
libeay32.lib:
|
11
erasure/README.md
Normal file
11
erasure/README.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
- Install 'build-essential'
|
||||||
|
|
||||||
|
~~~
|
||||||
|
# apt-get install build-essential -y
|
||||||
|
~~~
|
||||||
|
|
||||||
|
- Install 'yasm'
|
||||||
|
|
||||||
|
~~~
|
||||||
|
# apt-get install yasm -y
|
||||||
|
~~~
|
49
erasure/RELEASE-NOTES.INTEL
Normal file
49
erasure/RELEASE-NOTES.INTEL
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
================================================================================
|
||||||
|
v2.10 Intel Intelligent Storage Acceleration Library Release Notes
|
||||||
|
Open Source Version
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
RELEASE NOTE CONTENTS
|
||||||
|
================================================================================
|
||||||
|
1. KNOWN ISSUES
|
||||||
|
2. FIXED ISSUES
|
||||||
|
3. CHANGE LOG & FEATURES ADDED
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
1. KNOWN ISSUES
|
||||||
|
================================================================================
|
||||||
|
|
||||||
|
* Only erasure code unit included in open source version at this time.
|
||||||
|
|
||||||
|
* Perf tests do not run in Windows environment.
|
||||||
|
|
||||||
|
* Leaving <unit>/bin directories from builds in unit directories will cause the
|
||||||
|
top-level make build to fail. Build only in top-level or ensure unit
|
||||||
|
directories are clean of objects and /bin.
|
||||||
|
|
||||||
|
* 32-bit lib is not supported in Windows.
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
2. FIXED ISSUES
|
||||||
|
================================================================================
|
||||||
|
v2.10
|
||||||
|
|
||||||
|
* Fix for windows register save overlap in gf_{3-6}vect_dot_prod_sse.asm. Only
|
||||||
|
affects windows versions of erasure code. GP register saves/restore were
|
||||||
|
pushed to same stack area as XMM.
|
||||||
|
|
||||||
|
================================================================================
|
||||||
|
3. CHANGE LOG & FEATURES ADDED
|
||||||
|
================================================================================
|
||||||
|
v2.10
|
||||||
|
|
||||||
|
* Erasure code updates
|
||||||
|
- New AVX and AVX2 support functions.
|
||||||
|
- Changes min len requirement on gf_vect_dot_prod() to 32 from 16.
|
||||||
|
- Tests include both source and parity recovery with ec_encode_data().
|
||||||
|
- New encoding examples with Vandermonde or Cauchy matrix.
|
||||||
|
|
||||||
|
v2.8
|
||||||
|
|
||||||
|
* First open release of erasure code unit that is part of ISA-L.
|
5429
erasure/docs/isa-l_open_src_2.10.pdf
Normal file
5429
erasure/docs/isa-l_open_src_2.10.pdf
Normal file
File diff suppressed because it is too large
Load Diff
6680
erasure/include/ec-base.h
Normal file
6680
erasure/include/ec-base.h
Normal file
File diff suppressed because it is too large
Load Diff
659
erasure/include/erasure-code.h
Normal file
659
erasure/include/erasure-code.h
Normal file
@ -0,0 +1,659 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _ERASURE_CODE_H_
|
||||||
|
#define _ERASURE_CODE_H_
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file erasure_code.h
|
||||||
|
* @brief Interface to functions supporting erasure code encode and decode.
|
||||||
|
*
|
||||||
|
* This file defines the interface to optimized functions used in erasure
|
||||||
|
* codes. Encode and decode of erasures in GF(2^8) are made by calculating the
|
||||||
|
* dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
|
||||||
|
* set of coefficients. Values for the coefficients are determined by the type
|
||||||
|
* of erasure code. Using a general dot product means that any sequence of
|
||||||
|
* coefficients may be used including erasure codes based on random
|
||||||
|
* coefficients.
|
||||||
|
* Multiple versions of dot product are supplied to calculate 1-6 output
|
||||||
|
* vectors in one pass.
|
||||||
|
* Base GF multiply and divide functions can be sped up by defining
|
||||||
|
* GF_LARGE_TABLES at the expense of memory size.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gf-vect-mul.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize tables for fast Erasure Code encode and decode.
|
||||||
|
*
|
||||||
|
* Generates the expanded tables needed for fast encode or decode for erasure
|
||||||
|
* codes on blocks of data. 32bytes is generated for each input coefficient.
|
||||||
|
*
|
||||||
|
* @param k The number of vector sources or rows in the generator matrix
|
||||||
|
* for coding.
|
||||||
|
* @param rows The number of output vectors to concurrently encode/decode.
|
||||||
|
* @param a Pointer to sets of arrays of input coefficients used to encode
|
||||||
|
* or decode data.
|
||||||
|
* @param gftbls Pointer to start of space for concatenated output tables
|
||||||
|
* generated from input coefficients. Must be of size 32*k*rows.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate or decode erasure codes on blocks of data.
|
||||||
|
*
|
||||||
|
* Given a list of source data blocks, generate one or multiple blocks of
|
||||||
|
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||||
|
* suitable set of coefficients, this function will perform the fast generation
|
||||||
|
* or decoding of Reed-Solomon type erasure codes.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each block of data (vector) of source or dest data.
|
||||||
|
* @param k The number of vector sources or rows in the generator matrix
|
||||||
|
* for coding.
|
||||||
|
* @param rows The number of output vectors to concurrently encode/decode.
|
||||||
|
* @param gftbls Pointer to array of input tables generated from coding
|
||||||
|
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||||
|
* @param data Array of pointers to source input buffers.
|
||||||
|
* @param coding Array of pointers to coded output buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
|
||||||
|
*
|
||||||
|
* Given a list of source data blocks, generate one or multiple blocks of
|
||||||
|
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||||
|
* suitable set of coefficients, this function will perform the fast generation
|
||||||
|
* or decoding of Reed-Solomon type erasure codes.
|
||||||
|
*
|
||||||
|
* This function determines what instruction sets are enabled and
|
||||||
|
* selects the appropriate version at runtime.
|
||||||
|
*
|
||||||
|
* @param len Length of each block of data (vector) of source or dest data.
|
||||||
|
* @param k The number of vector sources or rows in the generator matrix
|
||||||
|
* for coding.
|
||||||
|
* @param rows The number of output vectors to concurrently encode/decode.
|
||||||
|
* @param gftbls Pointer to array of input tables generated from coding
|
||||||
|
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||||
|
* @param data Array of pointers to source input buffers.
|
||||||
|
* @param coding Array of pointers to coded output buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate or decode erasure codes on blocks of data, runs baseline version.
|
||||||
|
*
|
||||||
|
* Given a list of source data blocks, generate one or multiple blocks of
|
||||||
|
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
|
||||||
|
* suitable set of coefficients, this function will perform the fast generation
|
||||||
|
* or decoding of Reed-Solomon type erasure codes.
|
||||||
|
*
|
||||||
|
* @param len Length of each block of data (vector) of source or dest data.
|
||||||
|
* @param srcs The number of vector sources or rows in the generator matrix
|
||||||
|
* for coding.
|
||||||
|
* @param dests The number of output vectors to concurrently encode/decode.
|
||||||
|
* @param v Pointer to array of input tables generated from coding
|
||||||
|
* coefficients in ec_init_tables(). Must be of size 32*k*rows
|
||||||
|
* @param src Array of pointers to source input buffers.
|
||||||
|
* @param dest Array of pointers to coded output buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||||
|
* set of coefficients to produce each byte of the output. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 32*vlen byte constant array based on the input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||||
|
* on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Pointer to destination data array.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char *dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||||
|
* set of coefficients to produce each byte of the output. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 32*vlen byte constant array based on the input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||||
|
* on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Pointer to destination data array.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char *dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||||
|
* set of coefficients to produce each byte of the output. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 32*vlen byte constant array based on the input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||||
|
* on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Pointer to destination data array.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char *dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with two outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||||
|
* GF(2^8) dot products across each byte of the input array and two constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with two outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||||
|
* GF(2^8) dot products across each byte of the input array and two constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with two outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate two ouputs at a time. Does two
|
||||||
|
* GF(2^8) dot products across each byte of the input array and two constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 2*32*vlen byte constant array based on the two sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with three outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||||
|
* GF(2^8) dot products across each byte of the input array and three constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with three outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||||
|
* GF(2^8) dot products across each byte of the input array and three constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with three outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate three ouputs at a time. Does three
|
||||||
|
* GF(2^8) dot products across each byte of the input array and three constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 3*32*vlen byte constant array based on the three sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with four outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||||
|
* GF(2^8) dot products across each byte of the input array and four constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with four outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||||
|
* GF(2^8) dot products across each byte of the input array and four constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with four outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate four ouputs at a time. Does four
|
||||||
|
* GF(2^8) dot products across each byte of the input array and four constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 4*32*vlen byte constant array based on the four sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with five outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||||
|
* GF(2^8) dot products across each byte of the input array and five constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with five outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||||
|
* GF(2^8) dot products across each byte of the input array and five constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with five outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate five ouputs at a time. Does five
|
||||||
|
* GF(2^8) dot products across each byte of the input array and five constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 5*32*vlen byte constant array based on the five sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with six outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||||
|
* GF(2^8) dot products across each byte of the input array and six constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with six outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||||
|
* GF(2^8) dot products across each byte of the input array and six constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product with six outputs.
|
||||||
|
*
|
||||||
|
* Vector dot product optimized to calculate six ouputs at a time. Does six
|
||||||
|
* GF(2^8) dot products across each byte of the input array and six constant
|
||||||
|
* sets of coefficients to produce each byte of the outputs. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 6*32*vlen byte constant array based on the six sets of input coefficients.
|
||||||
|
*
|
||||||
|
* @requires AVX2
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
|
||||||
|
* based on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Array of pointers to destination data buffers.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char **dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product, runs baseline version.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||||
|
* set of coefficients to produce each byte of the output. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 32*vlen byte constant array based on the input coefficients.
|
||||||
|
*
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 16.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||||
|
* on the array of input coefficients. Only elements 32*CONST*j + 1
|
||||||
|
* of this array are used, where j = (0, 1, 2...) and CONST is the
|
||||||
|
* number of elements in the array of input coefficients. The
|
||||||
|
* elements used correspond to the original input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Pointer to destination data array.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char *dest);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector dot product, runs appropriate version.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) dot product across each byte of the input array and a constant
|
||||||
|
* set of coefficients to produce each byte of the output. Can be used for
|
||||||
|
* erasure coding encode and decode. Function requires pre-calculation of a
|
||||||
|
* 32*vlen byte constant array based on the input coefficients.
|
||||||
|
*
|
||||||
|
* This function determines what instruction sets are enabled and
|
||||||
|
* selects the appropriate version at runtime.
|
||||||
|
*
|
||||||
|
* @param len Length of each vector in bytes. Must be >= 32.
|
||||||
|
* @param vlen Number of vector sources.
|
||||||
|
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
|
||||||
|
* on the array of input coefficients.
|
||||||
|
* @param src Array of pointers to source inputs.
|
||||||
|
* @param dest Pointer to destination data array.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
|
||||||
|
unsigned char **src, unsigned char *dest);
|
||||||
|
|
||||||
|
/**********************************************************************
|
||||||
|
* The remaining are lib support functions used in GF(2^8) operations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Single element GF(2^8) multiply.
|
||||||
|
*
|
||||||
|
* @param a Multiplicand a
|
||||||
|
* @param b Multiplicand b
|
||||||
|
* @returns Product of a and b in GF(2^8)
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned char gf_mul(unsigned char a, unsigned char b);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Single element GF(2^8) inverse.
|
||||||
|
*
|
||||||
|
* @param a Input element
|
||||||
|
* @returns Field element b such that a x b = {1}
|
||||||
|
*/
|
||||||
|
|
||||||
|
unsigned char gf_inv(unsigned char a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate a matrix of coefficients to be used for encoding.
|
||||||
|
*
|
||||||
|
* Vandermonde matrix example of encoding coefficients where high portion of
|
||||||
|
* matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
|
||||||
|
* i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
|
||||||
|
* erasure encoding but does not guarantee invertable for every sub matrix. For
|
||||||
|
* large k it is possible to find cases where the decode matrix chosen from
|
||||||
|
* sources and parity not in erasure are not invertable. Users may want to
|
||||||
|
* adjust for k > 5.
|
||||||
|
*
|
||||||
|
* @param a [mxk] array to hold coefficients
|
||||||
|
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||||
|
* @param k number of columns in matrix corresponding to srcs.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_gen_rs_matrix(unsigned char *a, int m, int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Generate a Cauchy matrix of coefficients to be used for encoding.
|
||||||
|
*
|
||||||
|
* Cauchy matrix example of encoding coefficients where high portion of matrix
|
||||||
|
* is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
|
||||||
|
* i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
|
||||||
|
*
|
||||||
|
* @param a [mxk] array to hold coefficients
|
||||||
|
* @param m number of rows in matrix corresponding to srcs + parity.
|
||||||
|
* @param k number of columns in matrix corresponding to srcs.
|
||||||
|
* @returns none
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Invert a matrix in GF(2^8)
|
||||||
|
*
|
||||||
|
* @param in input matrix
|
||||||
|
* @param out output matrix such that [in] x [out] = [I] - identity matrix
|
||||||
|
* @param n size of matrix [nxn]
|
||||||
|
* @returns 0 successful, other fail on singular input matrix
|
||||||
|
*/
|
||||||
|
|
||||||
|
int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
|
||||||
|
|
||||||
|
/*************************************************************/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //_ERASURE_CODE_H_
|
81
erasure/include/erasure/tests.h
Normal file
81
erasure/include/erasure/tests.h
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __ERASURE_TESTS_H
|
||||||
|
#define __ERASURE_TESTS_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Use sys/time.h functions for time
|
||||||
|
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
struct perf{
|
||||||
|
struct timeval tv;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
inline int perf_start(struct perf *p)
|
||||||
|
{
|
||||||
|
return gettimeofday(&(p->tv), 0);
|
||||||
|
}
|
||||||
|
inline int perf_stop(struct perf *p)
|
||||||
|
{
|
||||||
|
return gettimeofday(&(p->tv), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void perf_print(struct perf stop, struct perf start, long long dsize)
|
||||||
|
{
|
||||||
|
long long secs = stop.tv.tv_sec - start.tv.tv_sec;
|
||||||
|
long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec;
|
||||||
|
|
||||||
|
printf("runtime = %10lld usecs", usecs);
|
||||||
|
if (dsize != 0) {
|
||||||
|
#if 1 // not bug in printf for 32-bit
|
||||||
|
printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024),
|
||||||
|
((double) usecs)/1000000, ((double) dsize) / (double)usecs);
|
||||||
|
#else
|
||||||
|
printf(", bandwidth %lld MB ", dsize/(1024*1024));
|
||||||
|
printf("in %.4f sec ",(double)usecs/1000000);
|
||||||
|
printf("= %.2f MB/s\n", (double)dsize/usecs);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // __ERASURE_TESTS_H
|
80
erasure/include/erasure/types.h
Normal file
80
erasure/include/erasure/types.h
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file types.h
|
||||||
|
* @brief Defines standard width types.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ERASURE_TYPES_H
|
||||||
|
#define __ERASURE_TYPES_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef __unix__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
# include <_mingw.h>
|
||||||
|
#endif
|
||||||
|
typedef unsigned __int64 UINT64;
|
||||||
|
typedef __int64 INT64;
|
||||||
|
typedef unsigned __int32 UINT32;
|
||||||
|
typedef unsigned __int16 UINT16;
|
||||||
|
typedef unsigned char UINT8;
|
||||||
|
#else
|
||||||
|
typedef unsigned long int UINT64;
|
||||||
|
typedef long int INT64;
|
||||||
|
typedef unsigned int UINT32;
|
||||||
|
typedef unsigned short int UINT16;
|
||||||
|
typedef unsigned char UINT8;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __unix__
|
||||||
|
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
||||||
|
# define __forceinline static inline
|
||||||
|
#else
|
||||||
|
# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
|
||||||
|
# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
# define DEBUG_PRINT(x) printf x
|
||||||
|
#else
|
||||||
|
# define DEBUG_PRINT(x) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //__ERASURE_TYPES_H
|
148
erasure/include/gf-vect-mul.h
Normal file
148
erasure/include/gf-vect-mul.h
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef _GF_VECT_MUL_H
|
||||||
|
#define _GF_VECT_MUL_H
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file gf-vect-mul.h
|
||||||
|
* @brief Interface to functions for vector (block) multiplication in GF(2^8).
|
||||||
|
*
|
||||||
|
* This file defines the interface to routines used in fast RAID rebuild and
|
||||||
|
* erasure codes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector multiply by constant.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||||
|
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||||
|
* and partial write functions. Function requires pre-calculation of a
|
||||||
|
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||||
|
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||||
|
* and src must be aligned to 32B.
|
||||||
|
|
||||||
|
* @requires SSE4.1
|
||||||
|
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||||
|
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||||
|
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||||
|
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||||
|
* @returns 0 pass, other fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
int gf_vect_mul_sse(int len, unsigned char *gftbl, void *src, void *dest);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector multiply by constant.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||||
|
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||||
|
* and partial write functions. Function requires pre-calculation of a
|
||||||
|
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||||
|
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||||
|
* and src must be aligned to 32B.
|
||||||
|
|
||||||
|
* @requires AVX
|
||||||
|
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||||
|
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||||
|
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||||
|
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||||
|
* @returns 0 pass, other fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
int gf_vect_mul_avx(int len, unsigned char *gftbl, void *src, void *dest);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector multiply by constant, runs appropriate version.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||||
|
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||||
|
* and partial write functions. Function requires pre-calculation of a
|
||||||
|
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||||
|
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
|
||||||
|
* Len and src must be aligned to 32B.
|
||||||
|
*
|
||||||
|
* This function determines what instruction sets are enabled
|
||||||
|
* and selects the appropriate version at runtime.
|
||||||
|
*
|
||||||
|
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||||
|
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
|
||||||
|
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||||
|
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||||
|
* @returns 0 pass, other fail
|
||||||
|
*/
|
||||||
|
|
||||||
|
int gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize 32-byte constant array for GF(2^8) vector multiply
|
||||||
|
*
|
||||||
|
* Calculates array {C{00}, C{01}, C{02}, ... , C{0f} }, {C{00}, C{10},
|
||||||
|
* C{20}, ... , C{f0} } as required by other fast vector multiply
|
||||||
|
* functions.
|
||||||
|
* @param c Constant input.
|
||||||
|
* @param gftbl Table output.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_mul_init(unsigned char c, unsigned char* gftbl);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief GF(2^8) vector multiply by constant, runs baseline version.
|
||||||
|
*
|
||||||
|
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
|
||||||
|
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
|
||||||
|
* and partial write functions. Function requires pre-calculation of a
|
||||||
|
* 32-element constant array based on constant C. gftbl(C) = {C{00},
|
||||||
|
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }. Len
|
||||||
|
* and src must be aligned to 32B.
|
||||||
|
*
|
||||||
|
* @param len Length of vector in bytes. Must be aligned to 32B.
|
||||||
|
* @param a Pointer to 32-byte array of pre-calculated constants based on C.
|
||||||
|
* only use 2nd element is used.
|
||||||
|
* @param src Pointer to src data array. Must be aligned to 32B.
|
||||||
|
* @param dest Pointer to destination data array. Must be aligned to 32B.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src,
|
||||||
|
unsigned char *dest);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //_GF_VECT_MUL_H
|
96
erasure/include/reg-sizes.asm
Normal file
96
erasure/include/reg-sizes.asm
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
%define EFLAGS_HAS_CPUID (1<<21)
|
||||||
|
%define FLAG_CPUID1_ECX_CLMUL (1<<1)
|
||||||
|
%define FLAG_CPUID1_EDX_SSE2 (1<<26)
|
||||||
|
%define FLAG_CPUID1_ECX_SSE3 (1)
|
||||||
|
%define FLAG_CPUID1_ECX_SSE4_1 (1<<19)
|
||||||
|
%define FLAG_CPUID1_ECX_SSE4_2 (1<<20)
|
||||||
|
%define FLAG_CPUID1_ECX_POPCNT (1<<23)
|
||||||
|
%define FLAG_CPUID1_ECX_AESNI (1<<25)
|
||||||
|
%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
|
||||||
|
%define FLAG_CPUID1_ECX_AVX (1<<28)
|
||||||
|
%define FLAG_CPUID1_EBX_AVX2 (1<<5)
|
||||||
|
%define FLAG_XGETBV_EAX_XMM_YMM 0x6
|
||||||
|
|
||||||
|
%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
|
||||||
|
|
||||||
|
; define d and w variants for registers
|
||||||
|
|
||||||
|
%define raxd eax
|
||||||
|
%define raxw ax
|
||||||
|
%define raxb al
|
||||||
|
|
||||||
|
%define rbxd ebx
|
||||||
|
%define rbxw bx
|
||||||
|
%define rbxb bl
|
||||||
|
|
||||||
|
%define rcxd ecx
|
||||||
|
%define rcxw cx
|
||||||
|
%define rcxb cl
|
||||||
|
|
||||||
|
%define rdxd edx
|
||||||
|
%define rdxw dx
|
||||||
|
%define rdxb dl
|
||||||
|
|
||||||
|
%define rsid esi
|
||||||
|
%define rsiw si
|
||||||
|
%define rsib sil
|
||||||
|
|
||||||
|
%define rdid edi
|
||||||
|
%define rdiw di
|
||||||
|
%define rdib dil
|
||||||
|
|
||||||
|
%define rbpd ebp
|
||||||
|
%define rbpw bp
|
||||||
|
%define rbpb bpl
|
||||||
|
|
||||||
|
%define ymm0x xmm0
|
||||||
|
%define ymm1x xmm1
|
||||||
|
%define ymm2x xmm2
|
||||||
|
%define ymm3x xmm3
|
||||||
|
%define ymm4x xmm4
|
||||||
|
%define ymm5x xmm5
|
||||||
|
%define ymm6x xmm6
|
||||||
|
%define ymm7x xmm7
|
||||||
|
%define ymm8x xmm8
|
||||||
|
%define ymm9x xmm9
|
||||||
|
%define ymm10x xmm10
|
||||||
|
%define ymm11x xmm11
|
||||||
|
%define ymm12x xmm12
|
||||||
|
%define ymm13x xmm13
|
||||||
|
%define ymm14x xmm14
|
||||||
|
%define ymm15x xmm15
|
||||||
|
|
||||||
|
%define DWORD(reg) reg %+ d
|
||||||
|
%define WORD(reg) reg %+ w
|
||||||
|
%define BYTE(reg) reg %+ b
|
||||||
|
|
||||||
|
%define XWORD(reg) reg %+ x
|
205
erasure/make.inc
Normal file
205
erasure/make.inc
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
########################################################################
|
||||||
|
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of Intel Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# Makefile include for optimized libraries
|
||||||
|
# make targets:
|
||||||
|
# lib - build library of optimized functions
|
||||||
|
# slib - build shared library
|
||||||
|
# test - run unit tests of functions
|
||||||
|
# perf - run performance tests
|
||||||
|
# sim - run on simulator
|
||||||
|
# trace - get simulator trace
|
||||||
|
# clean - remove object files
|
||||||
|
|
||||||
|
CC = gcc
|
||||||
|
AS = yasm
|
||||||
|
SIM = sde $(SIMFLAGS) --
|
||||||
|
|
||||||
|
DEBUG = -g
|
||||||
|
DEBUG_yasm = -g dwarf2
|
||||||
|
DEBUG_nasm = -g
|
||||||
|
|
||||||
|
# Default arch= build options
|
||||||
|
CFLAGS_gcc = -Wall
|
||||||
|
ASFLAGS_ = -f elf64
|
||||||
|
ARFLAGS_ = cr $@
|
||||||
|
STRIP_gcc = strip -d -R .comment $@
|
||||||
|
STRIP_clang = strip -d $@
|
||||||
|
|
||||||
|
# arch=32 build options
|
||||||
|
ASFLAGS_32 = -f elf32
|
||||||
|
CFLAGS_32 = -m32
|
||||||
|
ARFLAGS_32 = cr $@
|
||||||
|
|
||||||
|
# arch=win64 build options
|
||||||
|
ASFLAGS_win64 = -f win64
|
||||||
|
CFLAGS_icl = -Qstd=c99
|
||||||
|
ARFLAGS_win64 = -out:$@
|
||||||
|
|
||||||
|
# arch=mingw build options
|
||||||
|
ASFLAGS_mingw = -f win64
|
||||||
|
ARFLAGS_mingw = cr $@
|
||||||
|
lsrcmingw = $(lsrc)
|
||||||
|
unit_testsmingw = $(unit_tests)
|
||||||
|
examplesmingw = $(examples)
|
||||||
|
perf_testsmingw = $(perf_tests)
|
||||||
|
|
||||||
|
ifeq ($(arch),mingw)
|
||||||
|
CC=x86_64-w64-mingw32-gcc
|
||||||
|
AR=x86_64-w64-mingw32-ar
|
||||||
|
LDFLAGS = -Wl,--force-exe-suffix
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
INCLUDE = $(patsubst %,-I%,$(subst :, ,$(VPATH)))
|
||||||
|
CFLAGS = $(CFLAGS_$(arch)) $(CFLAGS_$(CC)) $(DEBUG) -O2 $(DEFINES) $(INCLUDE)
|
||||||
|
ASFLAGS = $(ASFLAGS_$(arch)) $(ASFLAGS_$(CC)) $(DEBUG_$(AS)) $(DEFINES) $(INCLUDE)
|
||||||
|
ARFLAGS = $(ARFLAGS_$(arch))
|
||||||
|
DEFINES += $(addprefix -D , $D)
|
||||||
|
|
||||||
|
O = src
|
||||||
|
lobj += $(patsubst %.c,%.o,$(patsubst %.asm,%.o,$(lsrc$(arch))))
|
||||||
|
objs = $(addprefix $(O)/,$(lobj))
|
||||||
|
|
||||||
|
|
||||||
|
lib_name ?= isa-l.a
|
||||||
|
default: lib
|
||||||
|
|
||||||
|
# Defaults for windows build
|
||||||
|
ifeq ($(arch),win64)
|
||||||
|
AR=lib
|
||||||
|
CC=cl
|
||||||
|
OUTPUT_OPTION = -Fo$@
|
||||||
|
DEBUG=
|
||||||
|
lib_name := $(basename $(lib_name)).lib
|
||||||
|
endif
|
||||||
|
lsrcwin64 = $(lsrc)
|
||||||
|
unit_testswin64 = $(unit_tests)
|
||||||
|
exampleswin64 = $(examples)
|
||||||
|
perf_testswin64 = $(perf_tests)
|
||||||
|
|
||||||
|
# Build and run unit tests, performance tests, etc.
|
||||||
|
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
|
||||||
|
|
||||||
|
$(sort $(unit_tests$(arch))): % : %.c $(tsrc$(arch)) $(lib_name)
|
||||||
|
$(sort $(perf_tests$(arch))): % : %.c $(lib_name)
|
||||||
|
$(sort $(examples$(arch))): % : %.c $(lib_name)
|
||||||
|
$(sort $(other_tests)): % : %.c $(lib_name)
|
||||||
|
|
||||||
|
sim test trace: $(addsuffix .run,$(unit_tests$(arch)))
|
||||||
|
perf: $(addsuffix .run,$(perf_tests$(arch)))
|
||||||
|
ex: $(examples$(arch))
|
||||||
|
all: lib $(all_tests)
|
||||||
|
other: $(other_tests)
|
||||||
|
tests: $(unit_tests$(arch))
|
||||||
|
perfs: $(perf_tests$(arch))
|
||||||
|
test perf: SIM=
|
||||||
|
trace: SIMFLAGS = -debugtrace
|
||||||
|
test sim:
|
||||||
|
@echo Finished running tests
|
||||||
|
|
||||||
|
$(objs): | $(O)
|
||||||
|
$(O): ; mkdir -p $(O)
|
||||||
|
|
||||||
|
|
||||||
|
# Build rule to run tests
|
||||||
|
%.run: %
|
||||||
|
$(SIM) $(@D)/$<
|
||||||
|
@echo Completed run: $<
|
||||||
|
|
||||||
|
# Other build rules
|
||||||
|
msg = $(if $(DEBUG),DEBUG) $(patsubst 32,32-bit,$(arch)) $D
|
||||||
|
|
||||||
|
$(O)/%.o: %.asm
|
||||||
|
@echo " ---> Building $< $(msg)"
|
||||||
|
@$(AS) $(ASFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
$(O)/%.o %.o: %.c
|
||||||
|
@echo " ---> Building $< $(msg)"
|
||||||
|
@$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||||
|
|
||||||
|
$(all_tests):
|
||||||
|
@echo " ---> Building Test $@ $(msg)"
|
||||||
|
@$(LINK.o) $(CFLAGS) $^ $(LDLIBS) -o $@
|
||||||
|
|
||||||
|
|
||||||
|
# Target to build lib files
|
||||||
|
lib: $(lib_name)
|
||||||
|
ifneq ($(lib_debug),1)
|
||||||
|
$(lib_name): DEBUG_$(AS)= # Don't put debug symbols in the lib
|
||||||
|
$(lib_name): DEBUG=
|
||||||
|
$(lib_name): DEFINES+=-D NDEBUG
|
||||||
|
endif
|
||||||
|
ifeq ($(lib_debug),1)
|
||||||
|
DEBUG+=-D DEBUG # Define DEBUG for macros
|
||||||
|
endif
|
||||||
|
|
||||||
|
#lib $(lib_name): $(lib_name)(${objs})
|
||||||
|
$(lib_name): $(objs)
|
||||||
|
@echo " ---> Creating Lib $@"
|
||||||
|
@$(AR) $(ARFLAGS) $^
|
||||||
|
@$(STRIP_$(CC)) $^
|
||||||
|
|
||||||
|
# Target for shared lib
|
||||||
|
so_lib_name ?= $(basename $(lib_name)).so
|
||||||
|
slib: $(so_lib_name)
|
||||||
|
aobjs += $(addprefix $(O)/,$(patsubst %.asm,%.o,$(filter %.asm,$(lsrc$(arch)))))
|
||||||
|
shared_objs += $(addprefix $(O)/shared_ver_,$(patsubst %.c,%.o,$(filter %.c,$(lsrc$(arch)))))
|
||||||
|
|
||||||
|
$(O)/shared_ver_%.o: %.c
|
||||||
|
@echo " ---> Building shared $< $(msg)"
|
||||||
|
@$(COMPILE.c) $(OUTPUT_OPTION) $<
|
||||||
|
|
||||||
|
ifneq ($(lib_debug),1)
|
||||||
|
$(so_lib_name): DEBUG_$(AS)=
|
||||||
|
$(so_lib_name): DEBUG=
|
||||||
|
$(so_lib_name): DEFINES+=-D NDEBUG
|
||||||
|
endif
|
||||||
|
|
||||||
|
$(shared_objs): CFLAGS += -fPIC
|
||||||
|
$(shared_objs) $(aobjs): | $(O)
|
||||||
|
$(so_lib_name): $(shared_objs) $(aobjs)
|
||||||
|
@echo " ---> Creating Shared Lib $@"
|
||||||
|
@$(CC) $(CFLAGS) -shared $(LDFLAGS) -o $@ $^
|
||||||
|
@$(STRIP_$(CC)) $^
|
||||||
|
|
||||||
|
# Collect performance data
|
||||||
|
rpt_name = perf_report_$(shell uname -n)_$(shell date +%y%m%d).perf
|
||||||
|
|
||||||
|
perf_report:
|
||||||
|
echo Results for $(rpt_name) >> $(rpt_name)
|
||||||
|
$(MAKE) -k perf | tee -a $(rpt_name)
|
||||||
|
@echo Summary:
|
||||||
|
-grep runtime $(rpt_name)
|
||||||
|
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@echo Cleaning up
|
||||||
|
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)
|
107
erasure/src/Makefile
Normal file
107
erasure/src/Makefile
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
########################################################################
|
||||||
|
# Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# * Neither the name of Intel Corporation nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
lib_name := erasure_code.a
|
||||||
|
|
||||||
|
lsrc += ec-highlevel-func.c \
|
||||||
|
ec-base.c \
|
||||||
|
gf-vect-mul-sse.asm \
|
||||||
|
gf-vect-mul-avx.asm \
|
||||||
|
gf-vect-dot-prod-sse.asm \
|
||||||
|
gf-vect-dot-prod-avx.asm \
|
||||||
|
gf-vect-dot-prod-avx2.asm \
|
||||||
|
gf-2vect-dot-prod-sse.asm \
|
||||||
|
gf-3vect-dot-prod-sse.asm \
|
||||||
|
gf-4vect-dot-prod-sse.asm \
|
||||||
|
gf-5vect-dot-prod-sse.asm \
|
||||||
|
gf-6vect-dot-prod-sse.asm \
|
||||||
|
gf-2vect-dot-prod-avx.asm \
|
||||||
|
gf-3vect-dot-prod-avx.asm \
|
||||||
|
gf-4vect-dot-prod-avx.asm \
|
||||||
|
gf-5vect-dot-prod-avx.asm \
|
||||||
|
gf-6vect-dot-prod-avx.asm \
|
||||||
|
gf-2vect-dot-prod-avx2.asm \
|
||||||
|
gf-3vect-dot-prod-avx2.asm \
|
||||||
|
gf-4vect-dot-prod-avx2.asm \
|
||||||
|
gf-5vect-dot-prod-avx2.asm \
|
||||||
|
gf-6vect-dot-prod-avx2.asm \
|
||||||
|
ec-multibinary.asm
|
||||||
|
|
||||||
|
lsrc32 += ec-highlevel-func.c ec-multibinary.asm ec-base.c
|
||||||
|
|
||||||
|
unit_tests32 += erasure-code-base-test erasure-code-test \
|
||||||
|
gf-vect-mul-test gf-vect-mul-base-test \
|
||||||
|
gf-vect-dot-prod-base-test gf-vect-dot-prod-test
|
||||||
|
|
||||||
|
perf_tests32 += gf-vect-mul-perf gf-vect-dot-prod-perf erasure-code-perf \
|
||||||
|
erasure-code-base-perf gf-vect-dot-prod-1tbl
|
||||||
|
|
||||||
|
extern_hdrs += erasure-code.h ec_base.h gf-vect-mul.h \
|
||||||
|
erasure/tests.h erausre/types.h
|
||||||
|
|
||||||
|
unit_tests += gf-vect-mul-test \
|
||||||
|
gf-vect-mul-sse-test \
|
||||||
|
gf-vect-mul-avx-test \
|
||||||
|
gf-vect-mul-base-test \
|
||||||
|
gf-vect-dot-prod-sse-test \
|
||||||
|
gf-vect-dot-prod-avx-test \
|
||||||
|
gf-2vect-dot-prod-sse-test \
|
||||||
|
gf-3vect-dot-prod-sse-test \
|
||||||
|
gf-4vect-dot-prod-sse-test \
|
||||||
|
gf-5vect-dot-prod-sse-test \
|
||||||
|
gf-6vect-dot-prod-sse-test \
|
||||||
|
gf-inverse-test \
|
||||||
|
gf-vect-dot-prod-base-test \
|
||||||
|
gf-vect-dot-prod-test \
|
||||||
|
erasure-code-test \
|
||||||
|
erasure-code-base-test \
|
||||||
|
erasure-code-sse-test
|
||||||
|
|
||||||
|
perf_tests += gf-vect-mul-perf \
|
||||||
|
gf-vect-mul-sse-perf \
|
||||||
|
gf-vect-mul-avx-perf \
|
||||||
|
gf-vect-dot-prod-sse-perf \
|
||||||
|
gf-vect-dot-prod-avx-perf \
|
||||||
|
gf-2vect-dot-prod-sse-perf \
|
||||||
|
gf-3vect-dot-prod-sse-perf \
|
||||||
|
gf-4vect-dot-prod-sse-perf \
|
||||||
|
gf-5vect-dot-prod-sse-perf \
|
||||||
|
gf-6vect-dot-prod-sse-perf \
|
||||||
|
gf-vect-dot-prod-perf \
|
||||||
|
gf-vect-dot-prod-1tbl \
|
||||||
|
erasure-code-perf \
|
||||||
|
erasure-code-base-perf \
|
||||||
|
erasure-code-sse-perf
|
||||||
|
|
||||||
|
other_src += reg-sizes.asm
|
||||||
|
|
||||||
|
VPATH = .. ../include
|
||||||
|
|
||||||
|
-include ../make.inc
|
320
erasure/src/ec-base.c
Normal file
320
erasure/src/ec-base.c
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "ec-base.h" // for GF tables
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
unsigned char gf_mul(unsigned char a, unsigned char b)
|
||||||
|
{
|
||||||
|
#ifndef GF_LARGE_TABLES
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if ((a == 0) || (b == 0))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return gff_base[(i = gflog_base[a] + gflog_base[b]) > 254 ? i - 255 : i];
|
||||||
|
#else
|
||||||
|
return gf_mul_table_base[b * 256 + a];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char gf_inv(unsigned char a)
|
||||||
|
{
|
||||||
|
#ifndef GF_LARGE_TABLES
|
||||||
|
if (a == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return gff_base[255 - gflog_base[a]];
|
||||||
|
#else
|
||||||
|
return gf_inv_table_base[a];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void gf_gen_rs_matrix(unsigned char *a, int m, int k)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
unsigned char p, gen = 1;
|
||||||
|
|
||||||
|
memset(a, 0, k * m);
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
a[k * i + i] = 1;
|
||||||
|
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
p = 1;
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
a[k * i + j] = p;
|
||||||
|
p = gf_mul(p, gen);
|
||||||
|
}
|
||||||
|
gen = gf_mul(gen, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
unsigned char *p;
|
||||||
|
|
||||||
|
// Identity matrix in high position
|
||||||
|
memset(a, 0, k * m);
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
a[k * i + i] = 1;
|
||||||
|
|
||||||
|
// For the rest choose 1/(i + j) | i != j
|
||||||
|
p = &a[k * k];
|
||||||
|
for (i = k; i < m; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
*p++ = gf_inv(i ^ j);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int gf_invert_matrix(unsigned char *in_mat, unsigned char *out_mat, const int n)
|
||||||
|
{
|
||||||
|
int i, j, k;
|
||||||
|
unsigned char temp;
|
||||||
|
|
||||||
|
// Set out_mat[] to the identity matrix
|
||||||
|
for (i = 0; i < n * n; i++) // memset(out_mat, 0, n*n)
|
||||||
|
out_mat[i] = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
out_mat[i * n + i] = 1;
|
||||||
|
|
||||||
|
// Inverse
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
// Check for 0 in pivot element
|
||||||
|
if (in_mat[i * n + i] == 0) {
|
||||||
|
// Find a row with non-zero in current column and swap
|
||||||
|
for (j = i + 1; j < n; j++)
|
||||||
|
if (in_mat[j * n + i])
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (j == n) // Couldn't find means it's singular
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (k = 0; k < n; k++) { // Swap rows i,j
|
||||||
|
temp = in_mat[i * n + k];
|
||||||
|
in_mat[i * n + k] = in_mat[j * n + k];
|
||||||
|
in_mat[j * n + k] = temp;
|
||||||
|
|
||||||
|
temp = out_mat[i * n + k];
|
||||||
|
out_mat[i * n + k] = out_mat[j * n + k];
|
||||||
|
out_mat[j * n + k] = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
temp = gf_inv(in_mat[i * n + i]); // 1/pivot
|
||||||
|
for (j = 0; j < n; j++) { // Scale row i by 1/pivot
|
||||||
|
in_mat[i * n + j] = gf_mul(in_mat[i * n + j], temp);
|
||||||
|
out_mat[i * n + j] = gf_mul(out_mat[i * n + j], temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
if (j == i)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
temp = in_mat[j * n + i];
|
||||||
|
for (k = 0; k < n; k++) {
|
||||||
|
out_mat[j * n + k] ^= gf_mul(temp, out_mat[i * n + k]);
|
||||||
|
in_mat[j * n + k] ^= gf_mul(temp, in_mat[i * n + k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculates const table gftbl in GF(2^8) from single input A
|
||||||
|
// gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, ... , A{f0} }
|
||||||
|
|
||||||
|
void gf_vect_mul_init(unsigned char c, unsigned char *tbl)
|
||||||
|
{
|
||||||
|
unsigned char c2 = (c << 1) ^ ((c & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
unsigned char c4 = (c2 << 1) ^ ((c2 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
unsigned char c8 = (c4 << 1) ^ ((c4 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
|
||||||
|
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
unsigned long long v1, v2, v4, v8, *t;
|
||||||
|
unsigned long long v10, v20, v40, v80;
|
||||||
|
unsigned char c17, c18, c20, c24;
|
||||||
|
|
||||||
|
t = (unsigned long long *)tbl;
|
||||||
|
|
||||||
|
v1 = c * 0x0100010001000100ull;
|
||||||
|
v2 = c2 * 0x0101000001010000ull;
|
||||||
|
v4 = c4 * 0x0101010100000000ull;
|
||||||
|
v8 = c8 * 0x0101010101010101ull;
|
||||||
|
|
||||||
|
v4 = v1 ^ v2 ^ v4;
|
||||||
|
t[0] = v4;
|
||||||
|
t[1] = v8 ^ v4;
|
||||||
|
|
||||||
|
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
|
||||||
|
v10 = c17 * 0x0100010001000100ull;
|
||||||
|
v20 = c18 * 0x0101000001010000ull;
|
||||||
|
v40 = c20 * 0x0101010100000000ull;
|
||||||
|
v80 = c24 * 0x0101010101010101ull;
|
||||||
|
|
||||||
|
v40 = v10 ^ v20 ^ v40;
|
||||||
|
t[2] = v40;
|
||||||
|
t[3] = v80 ^ v40;
|
||||||
|
|
||||||
|
#else // 32-bit or other
|
||||||
|
unsigned char c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
|
||||||
|
unsigned char c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30,
|
||||||
|
c31;
|
||||||
|
|
||||||
|
c3 = c2 ^ c;
|
||||||
|
c5 = c4 ^ c;
|
||||||
|
c6 = c4 ^ c2;
|
||||||
|
c7 = c4 ^ c3;
|
||||||
|
|
||||||
|
c9 = c8 ^ c;
|
||||||
|
c10 = c8 ^ c2;
|
||||||
|
c11 = c8 ^ c3;
|
||||||
|
c12 = c8 ^ c4;
|
||||||
|
c13 = c8 ^ c5;
|
||||||
|
c14 = c8 ^ c6;
|
||||||
|
c15 = c8 ^ c7;
|
||||||
|
|
||||||
|
tbl[0] = 0;
|
||||||
|
tbl[1] = c;
|
||||||
|
tbl[2] = c2;
|
||||||
|
tbl[3] = c3;
|
||||||
|
tbl[4] = c4;
|
||||||
|
tbl[5] = c5;
|
||||||
|
tbl[6] = c6;
|
||||||
|
tbl[7] = c7;
|
||||||
|
tbl[8] = c8;
|
||||||
|
tbl[9] = c9;
|
||||||
|
tbl[10] = c10;
|
||||||
|
tbl[11] = c11;
|
||||||
|
tbl[12] = c12;
|
||||||
|
tbl[13] = c13;
|
||||||
|
tbl[14] = c14;
|
||||||
|
tbl[15] = c15;
|
||||||
|
|
||||||
|
c17 = (c8 << 1) ^ ((c8 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c18 = (c17 << 1) ^ ((c17 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c19 = c18 ^ c17;
|
||||||
|
c20 = (c18 << 1) ^ ((c18 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c21 = c20 ^ c17;
|
||||||
|
c22 = c20 ^ c18;
|
||||||
|
c23 = c20 ^ c19;
|
||||||
|
c24 = (c20 << 1) ^ ((c20 & 0x80) ? 0x1d : 0); //Mult by GF{2}
|
||||||
|
c25 = c24 ^ c17;
|
||||||
|
c26 = c24 ^ c18;
|
||||||
|
c27 = c24 ^ c19;
|
||||||
|
c28 = c24 ^ c20;
|
||||||
|
c29 = c24 ^ c21;
|
||||||
|
c30 = c24 ^ c22;
|
||||||
|
c31 = c24 ^ c23;
|
||||||
|
|
||||||
|
tbl[16] = 0;
|
||||||
|
tbl[17] = c17;
|
||||||
|
tbl[18] = c18;
|
||||||
|
tbl[19] = c19;
|
||||||
|
tbl[20] = c20;
|
||||||
|
tbl[21] = c21;
|
||||||
|
tbl[22] = c22;
|
||||||
|
tbl[23] = c23;
|
||||||
|
tbl[24] = c24;
|
||||||
|
tbl[25] = c25;
|
||||||
|
tbl[26] = c26;
|
||||||
|
tbl[27] = c27;
|
||||||
|
tbl[28] = c28;
|
||||||
|
tbl[29] = c29;
|
||||||
|
tbl[30] = c30;
|
||||||
|
tbl[31] = c31;
|
||||||
|
|
||||||
|
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
}
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_base(int len, int vlen, unsigned char *v,
|
||||||
|
unsigned char **src, unsigned char *dest)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
unsigned char s;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < vlen; j++)
|
||||||
|
s ^= gf_mul(src[j][i], v[j * 32 + 1]);
|
||||||
|
|
||||||
|
dest[i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v,
|
||||||
|
unsigned char **src, unsigned char **dest)
|
||||||
|
{
|
||||||
|
int i, j, l;
|
||||||
|
unsigned char s;
|
||||||
|
|
||||||
|
for (l = 0; l < dests; l++) {
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < srcs; j++)
|
||||||
|
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
|
||||||
|
|
||||||
|
dest[l][i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest)
|
||||||
|
{
|
||||||
|
//2nd element of table array is ref value used to fill it in
|
||||||
|
unsigned char c = a[1];
|
||||||
|
while (len-- > 0)
|
||||||
|
*dest++ = gf_mul(c, *src++);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct slver {
|
||||||
|
UINT16 snum;
|
||||||
|
UINT8 ver;
|
||||||
|
UINT8 core;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Version info
|
||||||
|
struct slver gf_vect_mul_init_slver_00020035;
|
||||||
|
struct slver gf_vect_mul_init_slver = { 0x0035, 0x02, 0x00 };
|
||||||
|
|
||||||
|
struct slver ec_encode_data_base_slver_00010135;
|
||||||
|
struct slver ec_encode_data_base_slver = { 0x0135, 0x01, 0x00 };
|
||||||
|
|
||||||
|
struct slver gf_vect_mul_base_slver_00010136;
|
||||||
|
struct slver gf_vect_mul_base_slver = { 0x0136, 0x01, 0x00 };
|
||||||
|
|
||||||
|
struct slver gf_vect_dot_prod_base_slver_00010137;
|
||||||
|
struct slver gf_vect_dot_prod_base_slver = { 0x0137, 0x01, 0x00 };
|
152
erasure/src/ec-highlevel-func.c
Normal file
152
erasure/src/ec-highlevel-func.c
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
#include <limits.h>
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
void ec_init_tables(int k, int rows, unsigned char *a, unsigned char *g_tbls)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < rows; i++) {
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
gf_vect_mul_init(*a++, g_tbls);
|
||||||
|
g_tbls += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
void ec_encode_data_sse(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (len < 16) {
|
||||||
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rows >= 4) {
|
||||||
|
gf_4vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 4 * k * 32;
|
||||||
|
coding += 4;
|
||||||
|
rows -= 4;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_sse(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_sse(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ec_encode_data_avx(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (len < 16) {
|
||||||
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rows >= 4) {
|
||||||
|
gf_4vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 4 * k * 32;
|
||||||
|
coding += 4;
|
||||||
|
rows -= 4;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_avx(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_avx(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ec_encode_data_avx2(int len, int k, int rows, unsigned char *g_tbls, unsigned char **data,
|
||||||
|
unsigned char **coding)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (len < 32) {
|
||||||
|
ec_encode_data_base(len, k, rows, g_tbls, data, coding);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rows >= 4) {
|
||||||
|
gf_4vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
g_tbls += 4 * k * 32;
|
||||||
|
coding += 4;
|
||||||
|
rows -= 4;
|
||||||
|
}
|
||||||
|
switch (rows) {
|
||||||
|
case 3:
|
||||||
|
gf_3vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
gf_2vect_dot_prod_avx2(len, k, g_tbls, data, coding);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
gf_vect_dot_prod_avx2(len, k, g_tbls, data, *coding);
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //__WORDSIZE == 64 || _WIN64 || __x86_64__
|
||||||
|
|
||||||
|
struct slver {
|
||||||
|
UINT16 snum;
|
||||||
|
UINT8 ver;
|
||||||
|
UINT8 core;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Version info
|
||||||
|
struct slver ec_init_tables_slver_00010068;
|
||||||
|
struct slver ec_init_tables_slver = { 0x0068, 0x01, 0x00 };
|
||||||
|
|
||||||
|
struct slver ec_encode_data_sse_slver_00020069;
|
||||||
|
struct slver ec_encode_data_sse_slver = { 0x0069, 0x02, 0x00 };
|
266
erasure/src/ec-multibinary.asm
Normal file
266
erasure/src/ec-multibinary.asm
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define WRT_OPT wrt ..plt
|
||||||
|
%else
|
||||||
|
%define WRT_OPT
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf32
|
||||||
|
|
||||||
|
[bits 32]
|
||||||
|
|
||||||
|
%define def_wrd dd
|
||||||
|
%define wrd_sz dword
|
||||||
|
%define arg1 esi
|
||||||
|
|
||||||
|
%else
|
||||||
|
|
||||||
|
%include "reg-sizes.asm"
|
||||||
|
default rel
|
||||||
|
[bits 64]
|
||||||
|
|
||||||
|
%define def_wrd dq
|
||||||
|
%define wrd_sz qword
|
||||||
|
%define arg1 rsi
|
||||||
|
|
||||||
|
extern ec_encode_data_sse
|
||||||
|
extern ec_encode_data_avx
|
||||||
|
extern ec_encode_data_avx2
|
||||||
|
extern gf_vect_mul_sse
|
||||||
|
extern gf_vect_mul_avx
|
||||||
|
extern gf_vect_dot_prod_sse
|
||||||
|
extern gf_vect_dot_prod_avx
|
||||||
|
extern gf_vect_dot_prod_avx2
|
||||||
|
%endif
|
||||||
|
|
||||||
|
extern gf_vect_mul_base
|
||||||
|
extern ec_encode_data_base
|
||||||
|
extern gf_vect_dot_prod_base
|
||||||
|
|
||||||
|
section .data
|
||||||
|
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
||||||
|
;;; Therefore, *_dispatch_init is only executed on first call.
|
||||||
|
|
||||||
|
ec_encode_data_dispatched:
|
||||||
|
def_wrd ec_encode_data_mbinit
|
||||||
|
|
||||||
|
gf_vect_mul_dispatched:
|
||||||
|
def_wrd gf_vect_mul_mbinit
|
||||||
|
|
||||||
|
gf_vect_dot_prod_dispatched:
|
||||||
|
def_wrd gf_vect_dot_prod_mbinit
|
||||||
|
|
||||||
|
section .text
|
||||||
|
;;;;
|
||||||
|
; ec_encode_data multibinary function
|
||||||
|
;;;;
|
||||||
|
global ec_encode_data:function
|
||||||
|
ec_encode_data_mbinit:
|
||||||
|
call ec_encode_data_dispatch_init
|
||||||
|
|
||||||
|
ec_encode_data:
|
||||||
|
jmp wrd_sz [ec_encode_data_dispatched]
|
||||||
|
|
||||||
|
ec_encode_data_dispatch_init:
|
||||||
|
push arg1
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
|
lea arg1, [ec_encode_data_base]
|
||||||
|
%else
|
||||||
|
push rax
|
||||||
|
push rbx
|
||||||
|
push rcx
|
||||||
|
push rdx
|
||||||
|
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
cpuid
|
||||||
|
lea rbx, [ec_encode_data_sse WRT_OPT]
|
||||||
|
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||||
|
cmovne arg1, rbx
|
||||||
|
|
||||||
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
|
lea rbx, [ec_encode_data_avx WRT_OPT]
|
||||||
|
|
||||||
|
jne _done_ec_encode_data_init
|
||||||
|
mov rsi, rbx
|
||||||
|
|
||||||
|
;; Try for AVX2
|
||||||
|
xor ecx, ecx
|
||||||
|
mov eax, 7
|
||||||
|
cpuid
|
||||||
|
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||||
|
lea rbx, [ec_encode_data_avx2 WRT_OPT]
|
||||||
|
cmovne rsi, rbx
|
||||||
|
|
||||||
|
;; Does it have xmm and ymm support
|
||||||
|
xor ecx, ecx
|
||||||
|
xgetbv
|
||||||
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
je _done_ec_encode_data_init
|
||||||
|
lea rsi, [ec_encode_data_sse WRT_OPT]
|
||||||
|
|
||||||
|
_done_ec_encode_data_init:
|
||||||
|
pop rdx
|
||||||
|
pop rcx
|
||||||
|
pop rbx
|
||||||
|
pop rax
|
||||||
|
%endif ;; END 32-bit check
|
||||||
|
mov [ec_encode_data_dispatched], arg1
|
||||||
|
pop arg1
|
||||||
|
ret
|
||||||
|
|
||||||
|
;;;;
|
||||||
|
; gf_vect_mul multibinary function
|
||||||
|
;;;;
|
||||||
|
global gf_vect_mul:function
|
||||||
|
gf_vect_mul_mbinit:
|
||||||
|
call gf_vect_mul_dispatch_init
|
||||||
|
|
||||||
|
gf_vect_mul:
|
||||||
|
jmp wrd_sz [gf_vect_mul_dispatched]
|
||||||
|
|
||||||
|
gf_vect_mul_dispatch_init:
|
||||||
|
push arg1
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
|
lea arg1, [gf_vect_mul_base]
|
||||||
|
%else
|
||||||
|
push rax
|
||||||
|
push rbx
|
||||||
|
push rcx
|
||||||
|
push rdx
|
||||||
|
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
cpuid
|
||||||
|
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||||
|
lea rbx, [gf_vect_mul_sse WRT_OPT]
|
||||||
|
je _done_gf_vect_mul_dispatch_init
|
||||||
|
mov arg1, rbx
|
||||||
|
|
||||||
|
;; Try for AVX
|
||||||
|
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||||
|
cmp ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||||
|
jne _done_gf_vect_mul_dispatch_init
|
||||||
|
|
||||||
|
;; Does it have xmm and ymm support
|
||||||
|
xor ecx, ecx
|
||||||
|
xgetbv
|
||||||
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
jne _done_gf_vect_mul_dispatch_init
|
||||||
|
lea arg1, [gf_vect_mul_avx WRT_OPT]
|
||||||
|
|
||||||
|
_done_gf_vect_mul_dispatch_init:
|
||||||
|
pop rdx
|
||||||
|
pop rcx
|
||||||
|
pop rbx
|
||||||
|
pop rax
|
||||||
|
%endif ;; END 32-bit check
|
||||||
|
mov [gf_vect_mul_dispatched], arg1
|
||||||
|
pop arg1
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
;;;;
|
||||||
|
; gf_vect_dot_prod multibinary function
|
||||||
|
;;;;
|
||||||
|
global gf_vect_dot_prod:function
|
||||||
|
gf_vect_dot_prod_mbinit:
|
||||||
|
call gf_vect_dot_prod_dispatch_init
|
||||||
|
|
||||||
|
gf_vect_dot_prod:
|
||||||
|
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
||||||
|
|
||||||
|
gf_vect_dot_prod_dispatch_init:
|
||||||
|
push arg1
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||||
|
lea arg1, [gf_vect_dot_prod_base]
|
||||||
|
%else
|
||||||
|
push rax
|
||||||
|
push rbx
|
||||||
|
push rcx
|
||||||
|
push rdx
|
||||||
|
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
cpuid
|
||||||
|
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
|
||||||
|
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||||
|
cmovne arg1, rbx
|
||||||
|
|
||||||
|
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
|
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||||
|
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
|
||||||
|
|
||||||
|
jne _done_gf_vect_dot_prod_init
|
||||||
|
mov rsi, rbx
|
||||||
|
|
||||||
|
;; Try for AVX2
|
||||||
|
xor ecx, ecx
|
||||||
|
mov eax, 7
|
||||||
|
cpuid
|
||||||
|
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||||
|
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
|
||||||
|
cmovne rsi, rbx
|
||||||
|
|
||||||
|
;; Does it have xmm and ymm support
|
||||||
|
xor ecx, ecx
|
||||||
|
xgetbv
|
||||||
|
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||||
|
je _done_gf_vect_dot_prod_init
|
||||||
|
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
|
||||||
|
|
||||||
|
_done_gf_vect_dot_prod_init:
|
||||||
|
pop rdx
|
||||||
|
pop rcx
|
||||||
|
pop rbx
|
||||||
|
pop rax
|
||||||
|
%endif ;; END 32-bit check
|
||||||
|
mov [gf_vect_dot_prod_dispatched], arg1
|
||||||
|
pop arg1
|
||||||
|
ret
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion ec_encode_data, 00, 02, 0133
|
||||||
|
slversion gf_vect_mul, 00, 02, 0134
|
||||||
|
slversion gf_vect_dot_prod, 00, 01, 0138
|
168
erasure/src/erasure-code-base-perf.c
Normal file
168
erasure/src/erasure-code-base-perf.c
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
COPYRIGHT(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (100*m)
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (10)
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS(m) 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, m, k, nerrs, r;
|
||||||
|
void *buf;
|
||||||
|
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
// Pick test parameters
|
||||||
|
m = 14;
|
||||||
|
k = 10;
|
||||||
|
nerrs = 4;
|
||||||
|
const u8 err_list[] = {2, 4, 5, 7};
|
||||||
|
|
||||||
|
printf("erasure_code_base_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||||
|
printf(" Input test parameter error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < (m - k); i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN(m); j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Start encode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Make parity vects
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("erasure_code_base_encode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||||
|
|
||||||
|
// Start decode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix(b, d, k) < 0) {
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, c, g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("erasure_code_base_decode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
764
erasure/src/erasure-code-base-test.c
Normal file
764
erasure/src/erasure-code-base-test.c
Normal file
@ -0,0 +1,764 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 127
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 50
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#define EFENCE_TEST_MIN_SIZE 16
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_SEED
|
||||||
|
#define TEST_SEED 11
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Random errors
|
||||||
|
static void gen_err_list(unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, err;
|
||||||
|
int nerrs = 0, nsrcerrs = 0;
|
||||||
|
|
||||||
|
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err) {
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
if (i < k) {
|
||||||
|
nsrcerrs++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= m) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
if (err < k)
|
||||||
|
nsrcerrs = 1;
|
||||||
|
}
|
||||||
|
*pnerrs = nerrs;
|
||||||
|
*pnsrcerrs = nsrcerrs;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NO_INVERT_MATRIX -2
|
||||||
|
// Generate decode matrix from encode matrix
|
||||||
|
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||||
|
unsigned char *decode_matrix,
|
||||||
|
unsigned char *invert_matrix,
|
||||||
|
unsigned int *decode_index,
|
||||||
|
unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err,
|
||||||
|
int nerrs, int nsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j, p;
|
||||||
|
int r;
|
||||||
|
unsigned char *backup, *b, s;
|
||||||
|
int incr = 0;
|
||||||
|
|
||||||
|
b = malloc(MMAX * KMAX);
|
||||||
|
backup = malloc(MMAX * KMAX);
|
||||||
|
|
||||||
|
if (b == NULL || backup == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Construct matrix b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
b[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
backup[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
}
|
||||||
|
decode_index[i] = r;
|
||||||
|
}
|
||||||
|
incr = 0;
|
||||||
|
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||||
|
if (nerrs == (m - k)) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
incr++;
|
||||||
|
memcpy(b, backup, MMAX * KMAX);
|
||||||
|
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||||
|
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||||
|
// skip the erased parity line
|
||||||
|
incr++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (decode_index[k - 1] + incr >= m) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
decode_index[k - 1] += incr;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i < nsrcerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||||
|
for (p = nsrcerrs; p < nerrs; p++) {
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
s ^= gf_mul(invert_matrix[j * k + i],
|
||||||
|
encode_matrix[k * src_err_list[p] + j]);
|
||||||
|
|
||||||
|
decode_matrix[k * p + i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int re = 0;
|
||||||
|
int i, j, p, rtest, m, k;
|
||||||
|
int nerrs, nsrcerrs;
|
||||||
|
void *buf;
|
||||||
|
unsigned int decode_index[MMAX];
|
||||||
|
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||||
|
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||||
|
unsigned char *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int rows, align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *temp_ubuffs[TEST_SOURCES];
|
||||||
|
|
||||||
|
printf("erasure_code_base_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
srand(TEST_SEED);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code by encode and recovery
|
||||||
|
|
||||||
|
encode_matrix = malloc(MMAX * KMAX);
|
||||||
|
decode_matrix = malloc(MMAX * KMAX);
|
||||||
|
invert_matrix = malloc(MMAX * KMAX);
|
||||||
|
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||||
|
if (encode_matrix == NULL || decode_matrix == NULL
|
||||||
|
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Generate encode matrix encode_matrix
|
||||||
|
// The matrix generated by gf_gen_rs_matrix
|
||||||
|
// is not always invertable.
|
||||||
|
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_base(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
k = 16;
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
if (k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (rows = 1; rows <= 16; rows++) {
|
||||||
|
m = k + rows;
|
||||||
|
if (m > MMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_base(size, k, m - k, g_tbls, efence_buffs,
|
||||||
|
&efence_buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = efence_buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 !=
|
||||||
|
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||||
|
size)) {
|
||||||
|
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||||
|
k, nerrs);
|
||||||
|
|
||||||
|
printf("size = %d\n", size);
|
||||||
|
|
||||||
|
printf("Test erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], align);
|
||||||
|
printf("orig :");
|
||||||
|
dump(efence_buffs[src_err_list[i]], align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_base(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = ubuffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(ubuffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(ubuffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_ubuffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
|
||||||
|
offset = ubuffs[i] - buffs[i];
|
||||||
|
|
||||||
|
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign encode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(buffs[i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign encode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||||
|
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign decode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign decode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test size alignment
|
||||||
|
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size > 0; size -= align) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_base(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_base(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done EC tests: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
168
erasure/src/erasure-code-perf.c
Normal file
168
erasure/src/erasure-code-perf.c
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (10000*m)
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (50*m)
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS(m) 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, m, k, nerrs, r;
|
||||||
|
void *buf;
|
||||||
|
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
// Pick test parameters
|
||||||
|
m = 14;
|
||||||
|
k = 10;
|
||||||
|
nerrs = 4;
|
||||||
|
const u8 err_list[] = {2, 4, 5, 7};
|
||||||
|
|
||||||
|
printf("erasure_code_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||||
|
printf(" Input test parameter error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < (m - k); i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN(m); j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Start encode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Make parity vects
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("erasure_code_encode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||||
|
|
||||||
|
// Start decode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix(b, d, k) < 0) {
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, c, g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("erasure_code_decode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
168
erasure/src/erasure-code-sse-perf.c
Normal file
168
erasure/src/erasure-code-sse-perf.c
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define TEST_LEN(m) ((128*1024 / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (10000*m)
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 32
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1))
|
||||||
|
# define TEST_LOOPS(m) (50*m)
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS(m) 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, m, k, nerrs, r;
|
||||||
|
void *buf;
|
||||||
|
u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
// Pick test parameters
|
||||||
|
m = 14;
|
||||||
|
k = 10;
|
||||||
|
nerrs = 4;
|
||||||
|
const u8 err_list[] = {2, 4, 5, 7};
|
||||||
|
|
||||||
|
printf("erasure_code_sse_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs);
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX || nerrs > (m - k)){
|
||||||
|
printf(" Input test parameter error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(src_err_list, err_list, nerrs);
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
src_in_err[src_err_list[i]] = 1;
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < (m - k); i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN(m))) {
|
||||||
|
printf("alloc error: Fail\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN(m); j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Start encode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Make parity vects
|
||||||
|
ec_init_tables(k, m - k, &a[k * k], g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("erasure_code_sse_encode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest);
|
||||||
|
|
||||||
|
// Start decode test
|
||||||
|
perf_start(&start);
|
||||||
|
for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) {
|
||||||
|
// Construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix(b, d, k) < 0) {
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
c[k * i + j] = d[k * src_err_list[i] + j];
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, c, g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("erasure_code_sse_decode" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest);
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
764
erasure/src/erasure-code-sse-test.c
Normal file
764
erasure/src/erasure-code-sse-test.c
Normal file
@ -0,0 +1,764 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 127
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 200
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#define EFENCE_TEST_MIN_SIZE 16
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_SEED
|
||||||
|
#define TEST_SEED 11
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Random errors
|
||||||
|
static void gen_err_list(unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, err;
|
||||||
|
int nerrs = 0, nsrcerrs = 0;
|
||||||
|
|
||||||
|
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err) {
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
if (i < k) {
|
||||||
|
nsrcerrs++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= m) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
if (err < k)
|
||||||
|
nsrcerrs = 1;
|
||||||
|
}
|
||||||
|
*pnerrs = nerrs;
|
||||||
|
*pnsrcerrs = nsrcerrs;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NO_INVERT_MATRIX -2
|
||||||
|
// Generate decode matrix from encode matrix
|
||||||
|
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||||
|
unsigned char *decode_matrix,
|
||||||
|
unsigned char *invert_matrix,
|
||||||
|
unsigned int *decode_index,
|
||||||
|
unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err,
|
||||||
|
int nerrs, int nsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j, p;
|
||||||
|
int r;
|
||||||
|
unsigned char *backup, *b, s;
|
||||||
|
int incr = 0;
|
||||||
|
|
||||||
|
b = malloc(MMAX * KMAX);
|
||||||
|
backup = malloc(MMAX * KMAX);
|
||||||
|
|
||||||
|
if (b == NULL || backup == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Construct matrix b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
b[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
backup[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
}
|
||||||
|
decode_index[i] = r;
|
||||||
|
}
|
||||||
|
incr = 0;
|
||||||
|
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||||
|
if (nerrs == (m - k)) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
incr++;
|
||||||
|
memcpy(b, backup, MMAX * KMAX);
|
||||||
|
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||||
|
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||||
|
// skip the erased parity line
|
||||||
|
incr++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (decode_index[k - 1] + incr >= m) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
decode_index[k - 1] += incr;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i < nsrcerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||||
|
for (p = nsrcerrs; p < nerrs; p++) {
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
s ^= gf_mul(invert_matrix[j * k + i],
|
||||||
|
encode_matrix[k * src_err_list[p] + j]);
|
||||||
|
|
||||||
|
decode_matrix[k * p + i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int re = 0;
|
||||||
|
int i, j, p, rtest, m, k;
|
||||||
|
int nerrs, nsrcerrs;
|
||||||
|
void *buf;
|
||||||
|
unsigned int decode_index[MMAX];
|
||||||
|
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||||
|
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||||
|
unsigned char *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int rows, align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *temp_ubuffs[TEST_SOURCES];
|
||||||
|
|
||||||
|
printf("erasure_code_sse_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
srand(TEST_SEED);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code by encode and recovery
|
||||||
|
|
||||||
|
encode_matrix = malloc(MMAX * KMAX);
|
||||||
|
decode_matrix = malloc(MMAX * KMAX);
|
||||||
|
invert_matrix = malloc(MMAX * KMAX);
|
||||||
|
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||||
|
if (encode_matrix == NULL || decode_matrix == NULL
|
||||||
|
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Generate encode matrix encode_matrix
|
||||||
|
// The matrix generated by gf_gen_rs_matrix
|
||||||
|
// is not always invertable.
|
||||||
|
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
k = 16;
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
if (k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (rows = 1; rows <= 16; rows++) {
|
||||||
|
m = k + rows;
|
||||||
|
if (m > MMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_sse(size, k, m - k, g_tbls, efence_buffs,
|
||||||
|
&efence_buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = efence_buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 !=
|
||||||
|
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||||
|
size)) {
|
||||||
|
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||||
|
k, nerrs);
|
||||||
|
|
||||||
|
printf("size = %d\n", size);
|
||||||
|
|
||||||
|
printf("Test erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], align);
|
||||||
|
printf("orig :");
|
||||||
|
dump(efence_buffs[src_err_list[i]], align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_sse(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = ubuffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(ubuffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(ubuffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_ubuffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
|
||||||
|
offset = ubuffs[i] - buffs[i];
|
||||||
|
|
||||||
|
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign encode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(buffs[i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign encode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||||
|
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign decode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign decode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test size alignment
|
||||||
|
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size > 0; size -= align) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data_sse(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done EC tests: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
763
erasure/src/erasure-code-test.c
Normal file
763
erasure/src/erasure-code-test.c
Normal file
@ -0,0 +1,763 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 127
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 200
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#define EFENCE_TEST_MIN_SIZE 16
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_SEED
|
||||||
|
#define TEST_SEED 11
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate Random errors
|
||||||
|
static void gen_err_list(unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, err;
|
||||||
|
int nerrs = 0, nsrcerrs = 0;
|
||||||
|
|
||||||
|
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err) {
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
if (i < k) {
|
||||||
|
nsrcerrs++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= m) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
if (err < k)
|
||||||
|
nsrcerrs = 1;
|
||||||
|
}
|
||||||
|
*pnerrs = nerrs;
|
||||||
|
*pnsrcerrs = nsrcerrs;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NO_INVERT_MATRIX -2
|
||||||
|
// Generate decode matrix from encode matrix
|
||||||
|
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
|
||||||
|
unsigned char *decode_matrix,
|
||||||
|
unsigned char *invert_matrix,
|
||||||
|
unsigned int *decode_index,
|
||||||
|
unsigned char *src_err_list,
|
||||||
|
unsigned char *src_in_err,
|
||||||
|
int nerrs, int nsrcerrs, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j, p;
|
||||||
|
int r;
|
||||||
|
unsigned char *backup, *b, s;
|
||||||
|
int incr = 0;
|
||||||
|
|
||||||
|
b = malloc(MMAX * KMAX);
|
||||||
|
backup = malloc(MMAX * KMAX);
|
||||||
|
|
||||||
|
if (b == NULL || backup == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Construct matrix b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r])
|
||||||
|
r++;
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
b[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
backup[k * i + j] = encode_matrix[k * r + j];
|
||||||
|
}
|
||||||
|
decode_index[i] = r;
|
||||||
|
}
|
||||||
|
incr = 0;
|
||||||
|
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
|
||||||
|
if (nerrs == (m - k)) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
incr++;
|
||||||
|
memcpy(b, backup, MMAX * KMAX);
|
||||||
|
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
|
||||||
|
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
|
||||||
|
// skip the erased parity line
|
||||||
|
incr++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (decode_index[k - 1] + incr >= m) {
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
return NO_INVERT_MATRIX;
|
||||||
|
}
|
||||||
|
decode_index[k - 1] += incr;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i < nsrcerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++) {
|
||||||
|
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* src_err_list from encode_matrix * invert of b for parity decoding */
|
||||||
|
for (p = nsrcerrs; p < nerrs; p++) {
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
s ^= gf_mul(invert_matrix[j * k + i],
|
||||||
|
encode_matrix[k * src_err_list[p] + j]);
|
||||||
|
|
||||||
|
decode_matrix[k * p + i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(b);
|
||||||
|
free(backup);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int re = 0;
|
||||||
|
int i, j, p, rtest, m, k;
|
||||||
|
int nerrs, nsrcerrs;
|
||||||
|
void *buf;
|
||||||
|
unsigned int decode_index[MMAX];
|
||||||
|
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
|
||||||
|
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
|
||||||
|
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
|
||||||
|
unsigned char *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int rows, align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *temp_ubuffs[TEST_SOURCES];
|
||||||
|
|
||||||
|
printf("erasure_code_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
srand(TEST_SEED);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code by encode and recovery
|
||||||
|
|
||||||
|
encode_matrix = malloc(MMAX * KMAX);
|
||||||
|
decode_matrix = malloc(MMAX * KMAX);
|
||||||
|
invert_matrix = malloc(MMAX * KMAX);
|
||||||
|
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
|
||||||
|
if (encode_matrix == NULL || decode_matrix == NULL
|
||||||
|
|| invert_matrix == NULL || g_tbls == NULL) {
|
||||||
|
printf("Test failure! Error with malloc\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Generate encode matrix encode_matrix
|
||||||
|
// The matrix generated by gf_gen_rs_matrix
|
||||||
|
// is not always invertable.
|
||||||
|
gf_gen_rs_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Generate g_tbls from encode matrix encode_matrix
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix encode_matrix
|
||||||
|
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Choose random buffers to be in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list, src_in_err,
|
||||||
|
nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
k = 16;
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
if (k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (rows = 1; rows <= 16; rows++) {
|
||||||
|
m = k + rows;
|
||||||
|
if (m > MMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < m; i++) { // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data(size, k, m - k, g_tbls, efence_buffs, &efence_buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = efence_buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 !=
|
||||||
|
memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]],
|
||||||
|
size)) {
|
||||||
|
printf("Efence: Fail error recovery (%d, %d, %d)\n", m,
|
||||||
|
k, nerrs);
|
||||||
|
|
||||||
|
printf("size = %d\n", size);
|
||||||
|
|
||||||
|
printf("Test erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((u8 *) decode_matrix, m, k);
|
||||||
|
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], align);
|
||||||
|
printf("orig :");
|
||||||
|
dump(efence_buffs[src_err_list[i]], align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = ubuffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(ubuffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(ubuffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_ubuffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
|
||||||
|
for (i = 0; i < m; i++) {
|
||||||
|
|
||||||
|
offset = ubuffs[i] - buffs[i];
|
||||||
|
|
||||||
|
if (memcmp(buffs[i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign encode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(buffs[i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign encode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
offset = temp_ubuffs[k + i] - temp_buffs[k + i];
|
||||||
|
if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) {
|
||||||
|
printf("Fail rand ualign decode pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp
|
||||||
|
(temp_buffs[k + i] + offset + size, temp_buffs[0],
|
||||||
|
PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign decode pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test size alignment
|
||||||
|
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size > 0; size -= align) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// The matrix generated by gf_gen_cauchy1_matrix
|
||||||
|
// is always invertable.
|
||||||
|
gf_gen_cauchy1_matrix(encode_matrix, m, k);
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
// Generate g_tbls from encode matrix a
|
||||||
|
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
|
||||||
|
// Perform matrix dot_prod for EC encoding
|
||||||
|
// using g_tbls from encode matrix a
|
||||||
|
ec_encode_data(size, k, m - k, g_tbls, buffs, &buffs[k]);
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
|
||||||
|
// Generate decode matrix
|
||||||
|
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
|
||||||
|
invert_matrix, decode_index, src_err_list,
|
||||||
|
src_in_err, nerrs, nsrcerrs, k, m);
|
||||||
|
if (re != 0) {
|
||||||
|
printf("Fail to gf_gen_decode_matrix\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Pack recovery array as list of valid sources
|
||||||
|
// Its order must be the same as the order
|
||||||
|
// to generate matrix b in gf_gen_decode_matrix
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
recov[i] = buffs[decode_index[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
ec_init_tables(k, nerrs, decode_matrix, g_tbls);
|
||||||
|
ec_encode_data(size, k, nerrs, g_tbls, recov, &temp_buffs[k]);
|
||||||
|
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (j = 0; j < nerrs; j++)
|
||||||
|
printf(" %d", src_err_list[j]);
|
||||||
|
printf(" - Index = ");
|
||||||
|
for (p = 0; p < k; p++)
|
||||||
|
printf(" %d", decode_index[p]);
|
||||||
|
printf("\nencode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)encode_matrix, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((unsigned char *)invert_matrix, k, k);
|
||||||
|
printf("\ndecode_matrix:\n");
|
||||||
|
dump_u8xu8((unsigned char *)decode_matrix, m, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buffs[k + i], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done EC tests: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
234
erasure/src/gf-2vect-dot-prod-avx.asm
Normal file
234
erasure/src/gf-2vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_2vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_reg r12, 3*16 + 0*8
|
||||||
|
save_reg r13, 3*16 + 1*8
|
||||||
|
save_reg r14, 3*16 + 2*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
mov r12, [rsp + 3*16 + 0*8]
|
||||||
|
mov r13, [rsp + 3*16 + 1*8]
|
||||||
|
mov r14, [rsp + 3*16 + 2*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm8
|
||||||
|
%define xgft1_lo xmm7
|
||||||
|
%define xgft1_hi xmm6
|
||||||
|
%define xgft2_lo xmm5
|
||||||
|
%define xgft2_hi xmm4
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_2vect_dot_prod_avx:function
|
||||||
|
|
||||||
|
func(gf_2vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
.loop16
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
|
246
erasure/src/gf-2vect-dot-prod-avx2.asm
Normal file
246
erasure/src/gf-2vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_2vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
vmovdqa [rsp + 0*16], xmm6
|
||||||
|
vmovdqa [rsp + 1*16], xmm7
|
||||||
|
vmovdqa [rsp + 2*16], xmm8
|
||||||
|
save_reg r12, 3*16 + 0*8
|
||||||
|
save_reg r13, 3*16 + 1*8
|
||||||
|
save_reg r14, 3*16 + 2*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
mov r12, [rsp + 3*16 + 0*8]
|
||||||
|
mov r13, [rsp + 3*16 + 1*8]
|
||||||
|
mov r14, [rsp + 3*16 + 2*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm8
|
||||||
|
%define xmask0fx xmm8
|
||||||
|
%define xgft1_lo ymm7
|
||||||
|
%define xgft1_hi ymm6
|
||||||
|
%define xgft2_lo ymm5
|
||||||
|
%define xgft2_hi ymm4
|
||||||
|
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp1 ymm2
|
||||||
|
%define xp2 ymm3
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_2vect_dot_prod_avx2:function
|
||||||
|
|
||||||
|
func(gf_2vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
.loop32
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
|
216
erasure/src/gf-2vect-dot-prod-sse-perf.c
Normal file
216
erasure/src/gf-2vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32];
|
||||||
|
u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
|
||||||
|
u8 *buffs[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_2vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
477
erasure/src/gf-2vect-dot-prod-sse-test.c
Normal file
477
erasure/src/gf-2vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,477 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 10000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32];
|
||||||
|
u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2];
|
||||||
|
u8 *buffs[TEST_SOURCES];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptrs[2];
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(g1, 2, TEST_SOURCES);
|
||||||
|
memset(g2, 1, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test1 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test2 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptrs[0] - dest1;
|
||||||
|
|
||||||
|
if (memcmp(dest1, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad1 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad1 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[1] - dest2;
|
||||||
|
if (memcmp(dest2, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad2 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad2 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
236
erasure/src/gf-2vect-dot-prod-sse.asm
Normal file
236
erasure/src/gf-2vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_2vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 3*16 + 3*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_reg r12, 3*16 + 0*8
|
||||||
|
save_reg r13, 3*16 + 1*8
|
||||||
|
save_reg r14, 3*16 + 2*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm8, [rsp + 2*16]
|
||||||
|
mov r12, [rsp + 3*16 + 0*8]
|
||||||
|
mov r13, [rsp + 3*16 + 1*8]
|
||||||
|
mov r14, [rsp + 3*16 + 2*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm8
|
||||||
|
%define xgft1_lo xmm7
|
||||||
|
%define xgft1_hi xmm6
|
||||||
|
%define xgft2_lo xmm5
|
||||||
|
%define xgft2_hi xmm4
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_2vect_dot_prod_sse:function
|
||||||
|
|
||||||
|
func(gf_2vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
.loop16
|
||||||
|
pxor xp1, xp1
|
||||||
|
pxor xp2, xp2
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
|
258
erasure/src/gf-3vect-dot-prod-avx.asm
Normal file
258
erasure/src/gf-3vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_3vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_reg r12, 6*16 + 0*8
|
||||||
|
save_reg r13, 6*16 + 1*8
|
||||||
|
save_reg r14, 6*16 + 2*8
|
||||||
|
save_reg r15, 6*16 + 3*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
mov r12, [rsp + 6*16 + 0*8]
|
||||||
|
mov r13, [rsp + 6*16 + 1*8]
|
||||||
|
mov r14, [rsp + 6*16 + 2*8]
|
||||||
|
mov r15, [rsp + 6*16 + 3*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm11
|
||||||
|
%define xgft1_lo xmm10
|
||||||
|
%define xgft1_hi xmm9
|
||||||
|
%define xgft2_lo xmm8
|
||||||
|
%define xgft2_hi xmm7
|
||||||
|
%define xgft3_lo xmm6
|
||||||
|
%define xgft3_hi xmm5
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_3vect_dot_prod_avx:function
|
||||||
|
func(gf_3vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
|
271
erasure/src/gf-3vect-dot-prod-avx2.asm
Normal file
271
erasure/src/gf-3vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_3vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
vmovdqa [rsp + 0*16], xmm6
|
||||||
|
vmovdqa [rsp + 1*16], xmm7
|
||||||
|
vmovdqa [rsp + 2*16], xmm8
|
||||||
|
vmovdqa [rsp + 3*16], xmm9
|
||||||
|
vmovdqa [rsp + 4*16], xmm10
|
||||||
|
vmovdqa [rsp + 5*16], xmm11
|
||||||
|
save_reg r12, 6*16 + 0*8
|
||||||
|
save_reg r13, 6*16 + 1*8
|
||||||
|
save_reg r14, 6*16 + 2*8
|
||||||
|
save_reg r15, 6*16 + 3*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
mov r12, [rsp + 6*16 + 0*8]
|
||||||
|
mov r13, [rsp + 6*16 + 1*8]
|
||||||
|
mov r14, [rsp + 6*16 + 2*8]
|
||||||
|
mov r15, [rsp + 6*16 + 3*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm11
|
||||||
|
%define xmask0fx xmm11
|
||||||
|
%define xgft1_lo ymm10
|
||||||
|
%define xgft1_hi ymm9
|
||||||
|
%define xgft2_lo ymm8
|
||||||
|
%define xgft2_hi ymm7
|
||||||
|
%define xgft3_lo ymm6
|
||||||
|
%define xgft3_hi ymm5
|
||||||
|
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp1 ymm2
|
||||||
|
%define xp2 ymm3
|
||||||
|
%define xp3 ymm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_3vect_dot_prod_avx2:function
|
||||||
|
func(gf_3vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop32:
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft1_lo, xgft1_lo, xgft1_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft2_lo, xgft2_lo, xgft2_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft3_lo, xgft3_lo, xgft3_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
|
246
erasure/src/gf-3vect-dot-prod-sse-perf.c
Normal file
246
erasure/src/gf-3vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,246 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
583
erasure/src/gf-3vect-dot-prod-sse-test.c
Normal file
583
erasure/src/gf-3vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,583 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 10000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptrs[3];
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) "_test: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(g1, 2, TEST_SOURCES);
|
||||||
|
memset(g2, 1, TEST_SOURCES);
|
||||||
|
memset(g3, 7, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail zero" xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test1 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test2 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test3 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptrs[0] - dest1;
|
||||||
|
|
||||||
|
if (memcmp(dest1, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad1 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad1 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[1] - dest2;
|
||||||
|
if (memcmp(dest2, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad2 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad2 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[2] - dest3;
|
||||||
|
if (memcmp(dest3, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad3 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad3 end\n");;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
259
erasure/src/gf-3vect-dot-prod-sse.asm
Normal file
259
erasure/src/gf-3vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_3vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_reg r12, 6*16 + 0*8
|
||||||
|
save_reg r13, 6*16 + 1*8
|
||||||
|
save_reg r14, 6*16 + 2*8
|
||||||
|
save_reg r15, 6*16 + 3*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm8, [rsp + 2*16]
|
||||||
|
movdqa xmm9, [rsp + 3*16]
|
||||||
|
movdqa xmm10, [rsp + 4*16]
|
||||||
|
movdqa xmm11, [rsp + 5*16]
|
||||||
|
mov r12, [rsp + 6*16 + 0*8]
|
||||||
|
mov r13, [rsp + 6*16 + 1*8]
|
||||||
|
mov r14, [rsp + 6*16 + 2*8]
|
||||||
|
mov r15, [rsp + 6*16 + 3*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm11
|
||||||
|
%define xgft1_lo xmm10
|
||||||
|
%define xgft1_hi xmm9
|
||||||
|
%define xgft2_lo xmm8
|
||||||
|
%define xgft2_hi xmm7
|
||||||
|
%define xgft3_lo xmm6
|
||||||
|
%define xgft3_hi xmm5
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_3vect_dot_prod_sse:function
|
||||||
|
func(gf_3vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
pxor xp1, xp1
|
||||||
|
pxor xp2, xp2
|
||||||
|
pxor xp3, xp3
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
pxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
|
296
erasure/src/gf-4vect-dot-prod-avx.asm
Normal file
296
erasure/src/gf-4vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_4vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_reg r12, 9*16 + 0*8
|
||||||
|
save_reg r13, 9*16 + 1*8
|
||||||
|
save_reg r14, 9*16 + 2*8
|
||||||
|
save_reg r15, 9*16 + 3*8
|
||||||
|
save_reg rdi, 9*16 + 4*8
|
||||||
|
save_reg rsi, 9*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
mov r12, [rsp + 9*16 + 0*8]
|
||||||
|
mov r13, [rsp + 9*16 + 1*8]
|
||||||
|
mov r14, [rsp + 9*16 + 2*8]
|
||||||
|
mov r15, [rsp + 9*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 9*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 9*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define dest4 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm14
|
||||||
|
%define xgft1_lo xmm13
|
||||||
|
%define xgft1_hi xmm12
|
||||||
|
%define xgft2_lo xmm11
|
||||||
|
%define xgft2_hi xmm10
|
||||||
|
%define xgft3_lo xmm9
|
||||||
|
%define xgft3_hi xmm8
|
||||||
|
%define xgft4_lo xmm7
|
||||||
|
%define xgft4_hi xmm6
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_4vect_dot_prod_avx:function
|
||||||
|
func(gf_4vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest4, [dest1+3*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
XSTR [dest4+pos], xp4
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
|
305
erasure/src/gf-4vect-dot-prod-avx2.asm
Normal file
305
erasure/src/gf-4vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_4vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
vmovdqa [rsp + 0*16], xmm6
|
||||||
|
vmovdqa [rsp + 1*16], xmm7
|
||||||
|
vmovdqa [rsp + 2*16], xmm8
|
||||||
|
vmovdqa [rsp + 3*16], xmm9
|
||||||
|
vmovdqa [rsp + 4*16], xmm10
|
||||||
|
vmovdqa [rsp + 5*16], xmm11
|
||||||
|
vmovdqa [rsp + 6*16], xmm12
|
||||||
|
vmovdqa [rsp + 7*16], xmm13
|
||||||
|
vmovdqa [rsp + 8*16], xmm14
|
||||||
|
save_reg r12, 9*16 + 0*8
|
||||||
|
save_reg r13, 9*16 + 1*8
|
||||||
|
save_reg r14, 9*16 + 2*8
|
||||||
|
save_reg r15, 9*16 + 3*8
|
||||||
|
save_reg rdi, 9*16 + 4*8
|
||||||
|
save_reg rsi, 9*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
mov r12, [rsp + 9*16 + 0*8]
|
||||||
|
mov r13, [rsp + 9*16 + 1*8]
|
||||||
|
mov r14, [rsp + 9*16 + 2*8]
|
||||||
|
mov r15, [rsp + 9*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 9*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 9*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define dest4 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm14
|
||||||
|
%define xmask0fx xmm14
|
||||||
|
%define xgft1_lo ymm13
|
||||||
|
%define xgft1_hi ymm12
|
||||||
|
%define xgft2_lo ymm11
|
||||||
|
%define xgft2_hi ymm10
|
||||||
|
%define xgft3_lo ymm9
|
||||||
|
%define xgft3_hi ymm8
|
||||||
|
%define xgft4_lo ymm7
|
||||||
|
%define xgft4_hi ymm6
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp1 ymm2
|
||||||
|
%define xp2 ymm3
|
||||||
|
%define xp3 ymm4
|
||||||
|
%define xp4 ymm5
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_4vect_dot_prod_avx2:function
|
||||||
|
func(gf_4vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest4, [dest1+3*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop32:
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||||
|
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
XSTR [dest4+pos], xp4
|
||||||
|
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-32
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
|
281
erasure/src/gf-4vect-dot-prod-sse-perf.c
Normal file
281
erasure/src/gf-4vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||||
|
u8 *dest_ref4, *dest_ptrs[4];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS / 100; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_4vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
692
erasure/src/gf-4vect-dot-prod-sse-test.c
Normal file
692
erasure/src/gf-4vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,692 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 10000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3;
|
||||||
|
u8 *dest_ref4, *dest_ptrs[4];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptrs[4];
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
memset(g1, 2, TEST_SOURCES);
|
||||||
|
memset(g2, 1, TEST_SOURCES);
|
||||||
|
memset(g3, 7, TEST_SOURCES);
|
||||||
|
memset(g4, 3, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test1 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test2 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test3 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test4 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptrs[0] - dest1;
|
||||||
|
|
||||||
|
if (memcmp(dest1, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad1 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad1 end\n");
|
||||||
|
printf("size=%d offset=%d srcs=%d\n", size, offset, srcs);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[1] - dest2;
|
||||||
|
if (memcmp(dest2, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad2 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad2 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[2] - dest3;
|
||||||
|
if (memcmp(dest3, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad3 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad3 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[3] - dest4;
|
||||||
|
if (memcmp(dest4, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad4 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad4 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 32;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
296
erasure/src/gf-4vect-dot-prod-sse.asm
Normal file
296
erasure/src/gf-4vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_4vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 9*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_reg r12, 9*16 + 0*8
|
||||||
|
save_reg r13, 9*16 + 1*8
|
||||||
|
save_reg r14, 9*16 + 2*8
|
||||||
|
save_reg r15, 9*16 + 3*8
|
||||||
|
save_reg rdi, 9*16 + 4*8
|
||||||
|
save_reg rsi, 9*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm8, [rsp + 2*16]
|
||||||
|
movdqa xmm9, [rsp + 3*16]
|
||||||
|
movdqa xmm10, [rsp + 4*16]
|
||||||
|
movdqa xmm11, [rsp + 5*16]
|
||||||
|
movdqa xmm12, [rsp + 6*16]
|
||||||
|
movdqa xmm13, [rsp + 7*16]
|
||||||
|
movdqa xmm14, [rsp + 8*16]
|
||||||
|
mov r12, [rsp + 9*16 + 0*8]
|
||||||
|
mov r13, [rsp + 9*16 + 1*8]
|
||||||
|
mov r14, [rsp + 9*16 + 2*8]
|
||||||
|
mov r15, [rsp + 9*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 9*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 9*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest1 arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest2 tmp3
|
||||||
|
%define dest3 tmp4
|
||||||
|
%define dest4 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm14
|
||||||
|
%define xgft1_lo xmm13
|
||||||
|
%define xgft1_hi xmm12
|
||||||
|
%define xgft2_lo xmm11
|
||||||
|
%define xgft2_hi xmm10
|
||||||
|
%define xgft3_lo xmm9
|
||||||
|
%define xgft3_hi xmm8
|
||||||
|
%define xgft4_lo xmm7
|
||||||
|
%define xgft4_hi xmm6
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_4vect_dot_prod_sse:function
|
||||||
|
func(gf_4vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest2, [dest1+PS]
|
||||||
|
mov dest3, [dest1+2*PS]
|
||||||
|
mov dest4, [dest1+3*PS]
|
||||||
|
mov dest1, [dest1]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
pxor xp1, xp1
|
||||||
|
pxor xp2, xp2
|
||||||
|
pxor xp3, xp3
|
||||||
|
pxor xp4, xp4
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vec*(32/PS)] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vec*(32/PS)+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
movdqu xgft3_lo, [tmp+vec*(64/PS)] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft3_hi, [tmp+vec*(64/PS)+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
movdqu xgft4_lo, [tmp+vskip3] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft4_hi, [tmp+vskip3+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
pxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
pxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [dest3+pos], xp3
|
||||||
|
XSTR [dest4+pos], xp4
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
|
311
erasure/src/gf-5vect-dot-prod-avx.asm
Normal file
311
erasure/src/gf-5vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,311 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_5vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_xmm128 xmm15, 9*16
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
vmovdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft1_lo xmm14
|
||||||
|
%define xgft1_hi xmm13
|
||||||
|
%define xgft2_lo xmm12
|
||||||
|
%define xgft2_hi xmm11
|
||||||
|
%define xgft3_lo xmm10
|
||||||
|
%define xgft3_hi xmm9
|
||||||
|
%define xgft4_lo xmm8
|
||||||
|
%define xgft4_hi xmm7
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
%define xp5 xmm6
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_5vect_dot_prod_avx:function
|
||||||
|
func(gf_5vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
vpxor xp5, xp5
|
||||||
|
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
vmovdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_5vect_dot_prod_avx, 02, 03, 0194
|
323
erasure/src/gf-5vect-dot-prod-avx2.asm
Normal file
323
erasure/src/gf-5vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_5vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
vmovdqa [rsp + 0*16], xmm6
|
||||||
|
vmovdqa [rsp + 1*16], xmm7
|
||||||
|
vmovdqa [rsp + 2*16], xmm8
|
||||||
|
vmovdqa [rsp + 3*16], xmm9
|
||||||
|
vmovdqa [rsp + 4*16], xmm10
|
||||||
|
vmovdqa [rsp + 5*16], xmm11
|
||||||
|
vmovdqa [rsp + 6*16], xmm12
|
||||||
|
vmovdqa [rsp + 7*16], xmm13
|
||||||
|
vmovdqa [rsp + 8*16], xmm14
|
||||||
|
vmovdqa [rsp + 9*16], xmm15
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
vmovdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm15
|
||||||
|
%define xmask0fx xmm15
|
||||||
|
%define xgft1_lo ymm14
|
||||||
|
%define xgft1_hi ymm13
|
||||||
|
%define xgft2_lo ymm12
|
||||||
|
%define xgft2_hi ymm11
|
||||||
|
%define xgft3_lo ymm10
|
||||||
|
%define xgft3_hi ymm9
|
||||||
|
%define xgft4_lo ymm8
|
||||||
|
%define xgft4_hi ymm7
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp1 ymm2
|
||||||
|
%define xp2 ymm3
|
||||||
|
%define xp3 ymm4
|
||||||
|
%define xp4 ymm5
|
||||||
|
%define xp5 ymm6
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_5vect_dot_prod_avx2:function
|
||||||
|
func(gf_5vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop32:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
vpxor xp5, xp5
|
||||||
|
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xgft4_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
vperm2i128 xtmpa, xgft4_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||||
|
vperm2i128 x0, xgft4_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
vmovdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft4_hi, xgft4_lo, xgft4_lo, 0x01 ; swapped to hi | lo
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
vpshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp5, xgft1_hi ;xp5 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
|
319
erasure/src/gf-5vect-dot-prod-sse-perf.c
Normal file
319
erasure/src/gf-5vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,319 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls, *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest_ref1, *dest_ref2;
|
||||||
|
u8 *dest_ref3, *dest_ref4, *dest_ref5, *dest_ptrs[5];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
g_tbls = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref5 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
dest_ptrs[4] = dest5;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref5, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS / 20; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref5);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_5vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
805
erasure/src/gf-5vect-dot-prod-sse-test.c
Normal file
805
erasure/src/gf-5vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,805 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 20000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls;
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *buffs[TEST_SOURCES];
|
||||||
|
u8 *dest_ref1, *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5;
|
||||||
|
u8 *dest_ptrs[5];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptrs[5];
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
g_tbls = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref5 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
dest_ptrs[4] = dest5;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref5, 0, TEST_LEN);
|
||||||
|
memset(g1, 2, TEST_SOURCES);
|
||||||
|
memset(g2, 1, TEST_SOURCES);
|
||||||
|
memset(g3, 7, TEST_SOURCES);
|
||||||
|
memset(g4, 9, TEST_SOURCES);
|
||||||
|
memset(g5, 4, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test1 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test2 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test3 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test4 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test5 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref5, udest_ptrs[4], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[4], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptrs[0] - dest1;
|
||||||
|
|
||||||
|
if (memcmp(dest1, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad1 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad1 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[1] - dest2;
|
||||||
|
if (memcmp(dest2, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad2 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad2 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[2] - dest3;
|
||||||
|
if (memcmp(dest3, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad3 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad3 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[3] - dest4;
|
||||||
|
if (memcmp(dest4, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad4 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad4 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[4] - dest5;
|
||||||
|
if (memcmp(dest5, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad5 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad5 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[0], 25);
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref5, dest_ptrs[4], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[4], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
312
erasure/src/gf-5vect-dot-prod-sse.asm
Normal file
312
erasure/src/gf-5vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_5vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_xmm128 xmm15, 9*16
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm8, [rsp + 2*16]
|
||||||
|
movdqa xmm9, [rsp + 3*16]
|
||||||
|
movdqa xmm10, [rsp + 4*16]
|
||||||
|
movdqa xmm11, [rsp + 5*16]
|
||||||
|
movdqa xmm12, [rsp + 6*16]
|
||||||
|
movdqa xmm13, [rsp + 7*16]
|
||||||
|
movdqa xmm14, [rsp + 8*16]
|
||||||
|
movdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft1_lo xmm14
|
||||||
|
%define xgft1_hi xmm13
|
||||||
|
%define xgft2_lo xmm12
|
||||||
|
%define xgft2_hi xmm11
|
||||||
|
%define xgft3_lo xmm10
|
||||||
|
%define xgft3_hi xmm9
|
||||||
|
%define xgft4_lo xmm8
|
||||||
|
%define xgft4_hi xmm7
|
||||||
|
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
%define xp5 xmm6
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_5vect_dot_prod_sse:function
|
||||||
|
func(gf_5vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
pxor xp1, xp1
|
||||||
|
pxor xp2, xp2
|
||||||
|
pxor xp3, xp3
|
||||||
|
pxor xp4, xp4
|
||||||
|
pxor xp5, xp5
|
||||||
|
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
movdqu xgft4_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
movdqu xgft4_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
pxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
pshufb xgft4_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft4_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft4_hi, xgft4_lo ;GF add high and low partials
|
||||||
|
pxor xp4, xgft4_hi ;xp4 += partial
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp5, xgft1_hi ;xp5 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
|
323
erasure/src/gf-6vect-dot-prod-avx.asm
Normal file
323
erasure/src/gf-6vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_6vect_dot_prod_avx(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_xmm128 xmm15, 9*16
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
vmovdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft1_lo xmm14
|
||||||
|
%define xgft1_hi xmm13
|
||||||
|
%define xgft2_lo xmm12
|
||||||
|
%define xgft2_hi xmm11
|
||||||
|
%define xgft3_lo xmm10
|
||||||
|
%define xgft3_hi xmm9
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
%define xp5 xmm6
|
||||||
|
%define xp6 xmm7
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_6vect_dot_prod_avx:function
|
||||||
|
func(gf_6vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
vpxor xp5, xp5
|
||||||
|
vpxor xp6, xp6
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
vmovdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
vmovdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||||
|
vmovdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
mov tmp, [dest+5*PS]
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
XSTR [tmp+pos], xp6
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
|
334
erasure/src/gf-6vect-dot-prod-avx2.asm
Normal file
334
erasure/src/gf-6vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,334 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_6vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
vmovdqa [rsp + 0*16], xmm6
|
||||||
|
vmovdqa [rsp + 1*16], xmm7
|
||||||
|
vmovdqa [rsp + 2*16], xmm8
|
||||||
|
vmovdqa [rsp + 3*16], xmm9
|
||||||
|
vmovdqa [rsp + 4*16], xmm10
|
||||||
|
vmovdqa [rsp + 5*16], xmm11
|
||||||
|
vmovdqa [rsp + 6*16], xmm12
|
||||||
|
vmovdqa [rsp + 7*16], xmm13
|
||||||
|
vmovdqa [rsp + 8*16], xmm14
|
||||||
|
vmovdqa [rsp + 9*16], xmm15
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm8, [rsp + 2*16]
|
||||||
|
vmovdqa xmm9, [rsp + 3*16]
|
||||||
|
vmovdqa xmm10, [rsp + 4*16]
|
||||||
|
vmovdqa xmm11, [rsp + 5*16]
|
||||||
|
vmovdqa xmm12, [rsp + 6*16]
|
||||||
|
vmovdqa xmm13, [rsp + 7*16]
|
||||||
|
vmovdqa xmm14, [rsp + 8*16]
|
||||||
|
vmovdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm15
|
||||||
|
%define xmask0fx xmm15
|
||||||
|
%define xgft1_lo ymm14
|
||||||
|
%define xgft1_hi ymm13
|
||||||
|
%define xgft2_lo ymm12
|
||||||
|
%define xgft2_hi ymm11
|
||||||
|
%define xgft3_lo ymm10
|
||||||
|
%define xgft3_hi ymm9
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp1 ymm2
|
||||||
|
%define xp2 ymm3
|
||||||
|
%define xp3 ymm4
|
||||||
|
%define xp4 ymm5
|
||||||
|
%define xp5 ymm6
|
||||||
|
%define xp6 ymm7
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_6vect_dot_prod_avx2:function
|
||||||
|
func(gf_6vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop32:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
vpxor xp1, xp1
|
||||||
|
vpxor xp2, xp2
|
||||||
|
vpxor xp3, xp3
|
||||||
|
vpxor xp4, xp4
|
||||||
|
vpxor xp5, xp5
|
||||||
|
vpxor xp6, xp6
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add vec_i, PS
|
||||||
|
|
||||||
|
vpand xgft3_lo, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
vperm2i128 xtmpa, xgft3_lo, x0, 0x30 ;swap xtmpa from 1lo|2lo to 1lo|2hi
|
||||||
|
vperm2i128 x0, xgft3_lo, x0, 0x12 ;swap x0 from 1hi|2hi to 1hi|2lo
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||||
|
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
|
||||||
|
vmovdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
vmovdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
vmovdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||||
|
; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||||
|
add tmp, 32
|
||||||
|
vperm2i128 xgft1_hi, xgft1_lo, xgft1_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft2_hi, xgft2_lo, xgft2_lo, 0x01 ; swapped to hi | lo
|
||||||
|
vperm2i128 xgft3_hi, xgft3_lo, xgft3_lo, 0x01 ; swapped to hi | lo
|
||||||
|
|
||||||
|
vpshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
vpxor xp4, xgft1_hi ;xp4 += partial
|
||||||
|
|
||||||
|
vpshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
vpxor xp5, xgft2_hi ;xp5 += partial
|
||||||
|
|
||||||
|
vpshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
vpxor xp6, xgft3_hi ;xp6 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
mov tmp, [dest+5*PS]
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
XSTR [tmp+pos], xp6
|
||||||
|
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_6vect_dot_prod_avx2, 04, 03, 019a
|
352
erasure/src/gf-6vect-dot-prod-sse-perf.c
Normal file
352
erasure/src/gf-6vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,352 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
|
||||||
|
u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
|
||||||
|
u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
g_tbls = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest6 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref6 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
dest_ptrs[4] = dest5;
|
||||||
|
dest_ptrs[5] = dest6;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
memset(dest6, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref5, 0, TEST_LEN);
|
||||||
|
memset(dest_ref6, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref6);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS / 20; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref6);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_6vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
gf_vect_mul_init(g1[j], &g_tbls[j * 32]);
|
||||||
|
gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||||
|
printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test6\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest6, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
911
erasure/src/gf-6vect-dot-prod-sse-test.c
Normal file
911
erasure/src/gf-6vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,911 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 20000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs;
|
||||||
|
void *buf;
|
||||||
|
u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES];
|
||||||
|
u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls;
|
||||||
|
u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1;
|
||||||
|
u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6;
|
||||||
|
u8 *dest_ptrs[6], *buffs[TEST_SOURCES];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptrs[6];
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 16, 2 * (6 * TEST_SOURCES * 32))) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
g_tbls = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest6 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref1 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref2 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref3 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref4 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref5 = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref6 = buf;
|
||||||
|
|
||||||
|
dest_ptrs[0] = dest1;
|
||||||
|
dest_ptrs[1] = dest2;
|
||||||
|
dest_ptrs[2] = dest3;
|
||||||
|
dest_ptrs[3] = dest4;
|
||||||
|
dest_ptrs[4] = dest5;
|
||||||
|
dest_ptrs[5] = dest6;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN);
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
memset(dest6, 0, TEST_LEN);
|
||||||
|
memset(dest_ref1, 0, TEST_LEN);
|
||||||
|
memset(dest_ref2, 0, TEST_LEN);
|
||||||
|
memset(dest_ref3, 0, TEST_LEN);
|
||||||
|
memset(dest_ref4, 0, TEST_LEN);
|
||||||
|
memset(dest_ref5, 0, TEST_LEN);
|
||||||
|
memset(dest_ref6, 0, TEST_LEN);
|
||||||
|
memset(g1, 2, TEST_SOURCES);
|
||||||
|
memset(g2, 1, TEST_SOURCES);
|
||||||
|
memset(g3, 7, TEST_SOURCES);
|
||||||
|
memset(g4, 9, TEST_SOURCES);
|
||||||
|
memset(g5, 4, TEST_SOURCES);
|
||||||
|
memset(g6, 0xe6, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[32 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[64 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[96 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[128 * TEST_SOURCES + i * 32]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[160 * TEST_SOURCES + i * 32]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs,
|
||||||
|
dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test2\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test3\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test4\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test5\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test6\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest6, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||||
|
buffs, dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest6, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[32 * srcs], buffs,
|
||||||
|
dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[64 * srcs], buffs,
|
||||||
|
dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[96 * srcs], buffs,
|
||||||
|
dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[128 * srcs], buffs,
|
||||||
|
dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[160 * srcs], buffs,
|
||||||
|
dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test1 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test2 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test3 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test4 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test5 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST)
|
||||||
|
" test6 srcs=%d\n", srcs);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest6, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[(160 * TEST_SOURCES) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES],
|
||||||
|
efence_buffs, dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref1, dest1, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test1 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest1, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref2, dest2, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test2 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest2, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref3, dest3, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test3 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest3, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref4, dest4, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test4 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest4, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref5, dest5, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test5 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest5, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref6, dest6, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test6 %d\n", rtest);
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, align);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest6, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptrs[0] = dest1 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[1] = dest2 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[2] = dest3 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[3] = dest4 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[4] = dest5 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
udest_ptrs[5] = dest6 + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest1, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
memset(dest2, 0, TEST_LEN);
|
||||||
|
memset(dest3, 0, TEST_LEN);
|
||||||
|
memset(dest4, 0, TEST_LEN);
|
||||||
|
memset(dest5, 0, TEST_LEN);
|
||||||
|
memset(dest6, 0, TEST_LEN);
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], ubuffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], ubuffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], ubuffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], ubuffs, dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], ubuffs, dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, udest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, udest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, udest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, udest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref5, udest_ptrs[4], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[4], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref6, udest_ptrs[5], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(udest_ptrs[5], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref1, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptrs[0] - dest1;
|
||||||
|
|
||||||
|
if (memcmp(dest1, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad1 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest1 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad1 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[1] - dest2;
|
||||||
|
if (memcmp(dest2, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad2 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest2 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad2 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[2] - dest3;
|
||||||
|
if (memcmp(dest3, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad3 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest3 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad3 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[3] - dest4;
|
||||||
|
if (memcmp(dest4, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad4 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest4 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad4 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[4] - dest5;
|
||||||
|
if (memcmp(dest5, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad5 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest5 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad5 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = udest_ptrs[5] - dest6;
|
||||||
|
if (memcmp(dest6, dest_ref1, offset)) {
|
||||||
|
printf("Fail rand ualign pad6 start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest6 + offset + size, dest_ref1, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad6 end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
g1[i] = rand();
|
||||||
|
g2[i] = rand();
|
||||||
|
g3[i] = rand();
|
||||||
|
g4[i] = rand();
|
||||||
|
g5[i] = rand();
|
||||||
|
g6[i] = rand();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++) {
|
||||||
|
gf_vect_mul_init(g1[i], &g_tbls[i * 32]);
|
||||||
|
gf_vect_mul_init(g2[i], &g_tbls[(32 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g3[i], &g_tbls[(64 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g4[i], &g_tbls[(96 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g5[i], &g_tbls[(128 * srcs) + (i * 32)]);
|
||||||
|
gf_vect_mul_init(g6[i], &g_tbls[(160 * srcs) + (i * 32)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref1);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[32 * srcs], buffs, dest_ref2);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[64 * srcs], buffs, dest_ref3);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[96 * srcs], buffs, dest_ref4);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[128 * srcs], buffs, dest_ref5);
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[160 * srcs], buffs, dest_ref6);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest_ptrs);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref1, dest_ptrs[0], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref1, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[0], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref2, dest_ptrs[1], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref2, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[1], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref3, dest_ptrs[2], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref3, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[2], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref4, dest_ptrs[3], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref4, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[3], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref5, dest_ptrs[4], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref5, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[4], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest_ref6, dest_ptrs[5], size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref6, 25);
|
||||||
|
printf("dprod_dut:");
|
||||||
|
dump(dest_ptrs[5], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass\n");
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
323
erasure/src/gf-6vect-dot-prod-sse.asm
Normal file
323
erasure/src/gf-6vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_6vect_dot_prod_sse(len, vec, *g_tbls, **buffs, **dests);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r12 ; must be saved and restored
|
||||||
|
%define tmp5 r14 ; must be saved and restored
|
||||||
|
%define tmp6 r15 ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
|
||||||
|
%define func(x) x:
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
%endmacro
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved, loaded and restored
|
||||||
|
%define arg5 r15 ; must be saved and restored
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r13 ; must be saved and restored
|
||||||
|
%define tmp4 r14 ; must be saved and restored
|
||||||
|
%define tmp5 rdi ; must be saved and restored
|
||||||
|
%define tmp6 rsi ; must be saved and restored
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define LOG_PS 3
|
||||||
|
%define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
|
||||||
|
%define arg(x) [rsp + stack_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm8, 2*16
|
||||||
|
save_xmm128 xmm9, 3*16
|
||||||
|
save_xmm128 xmm10, 4*16
|
||||||
|
save_xmm128 xmm11, 5*16
|
||||||
|
save_xmm128 xmm12, 6*16
|
||||||
|
save_xmm128 xmm13, 7*16
|
||||||
|
save_xmm128 xmm14, 8*16
|
||||||
|
save_xmm128 xmm15, 9*16
|
||||||
|
save_reg r12, 10*16 + 0*8
|
||||||
|
save_reg r13, 10*16 + 1*8
|
||||||
|
save_reg r14, 10*16 + 2*8
|
||||||
|
save_reg r15, 10*16 + 3*8
|
||||||
|
save_reg rdi, 10*16 + 4*8
|
||||||
|
save_reg rsi, 10*16 + 5*8
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm8, [rsp + 2*16]
|
||||||
|
movdqa xmm9, [rsp + 3*16]
|
||||||
|
movdqa xmm10, [rsp + 4*16]
|
||||||
|
movdqa xmm11, [rsp + 5*16]
|
||||||
|
movdqa xmm12, [rsp + 6*16]
|
||||||
|
movdqa xmm13, [rsp + 7*16]
|
||||||
|
movdqa xmm14, [rsp + 8*16]
|
||||||
|
movdqa xmm15, [rsp + 9*16]
|
||||||
|
mov r12, [rsp + 10*16 + 0*8]
|
||||||
|
mov r13, [rsp + 10*16 + 1*8]
|
||||||
|
mov r14, [rsp + 10*16 + 2*8]
|
||||||
|
mov r15, [rsp + 10*16 + 3*8]
|
||||||
|
mov rdi, [rsp + 10*16 + 4*8]
|
||||||
|
mov rsi, [rsp + 10*16 + 5*8]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
%define ptr arg5
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define dest1 tmp3
|
||||||
|
%define dest2 tmp4
|
||||||
|
%define vskip1 tmp5
|
||||||
|
%define vskip3 tmp6
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft1_lo xmm14
|
||||||
|
%define xgft1_hi xmm13
|
||||||
|
%define xgft2_lo xmm12
|
||||||
|
%define xgft2_hi xmm11
|
||||||
|
%define xgft3_lo xmm10
|
||||||
|
%define xgft3_hi xmm9
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp1 xmm2
|
||||||
|
%define xp2 xmm3
|
||||||
|
%define xp3 xmm4
|
||||||
|
%define xp4 xmm5
|
||||||
|
%define xp5 xmm6
|
||||||
|
%define xp6 xmm7
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_6vect_dot_prod_sse:function
|
||||||
|
func(gf_6vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
mov vskip1, vec
|
||||||
|
imul vskip1, 32
|
||||||
|
mov vskip3, vec
|
||||||
|
imul vskip3, 96
|
||||||
|
sal vec, LOG_PS ;vec *= PS. Make vec_i count by PS
|
||||||
|
mov dest1, [dest]
|
||||||
|
mov dest2, [dest+PS]
|
||||||
|
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
pxor xp1, xp1
|
||||||
|
pxor xp2, xp2
|
||||||
|
pxor xp3, xp3
|
||||||
|
pxor xp4, xp4
|
||||||
|
pxor xp5, xp5
|
||||||
|
pxor xp6, xp6
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i]
|
||||||
|
add vec_i, PS
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp] ;Load array Ax{00}, Ax{01}, ..., Ax{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+16] ; " Ax{00}, Ax{10}, ..., Ax{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vskip1*1] ;Load array Bx{00}, Bx{01}, ..., Bx{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vskip1*1+16] ; " Bx{00}, Bx{10}, ..., Bx{f0}
|
||||||
|
movdqu xgft3_lo, [tmp+vskip1*2] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft3_hi, [tmp+vskip1*2+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
lea ptr, [vskip1 + vskip1*4] ;ptr = vskip5
|
||||||
|
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp1, xgft1_hi ;xp1 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp2, xgft2_hi ;xp2 += partial
|
||||||
|
|
||||||
|
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
pxor xp3, xgft3_hi ;xp3 += partial
|
||||||
|
|
||||||
|
|
||||||
|
movdqu xgft1_lo, [tmp+vskip3] ;Load array Dx{00}, Dx{01}, ..., Dx{0f}
|
||||||
|
movdqu xgft1_hi, [tmp+vskip3+16] ; " Dx{00}, Dx{10}, ..., Dx{f0}
|
||||||
|
movdqu xgft2_lo, [tmp+vskip1*4] ;Load array Ex{00}, Ex{01}, ..., Ex{0f}
|
||||||
|
movdqu xgft2_hi, [tmp+vskip1*4+16] ; " Ex{00}, Ex{10}, ..., Ex{f0}
|
||||||
|
movdqu xgft3_lo, [tmp+ptr] ;Load array Fx{00}, Fx{01}, ..., Fx{0f}
|
||||||
|
movdqu xgft3_hi, [tmp+ptr+16] ; " Fx{00}, Fx{10}, ..., Fx{f0}
|
||||||
|
add tmp, 32
|
||||||
|
|
||||||
|
|
||||||
|
pshufb xgft1_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft1_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft1_hi, xgft1_lo ;GF add high and low partials
|
||||||
|
pxor xp4, xgft1_hi ;xp4 += partial
|
||||||
|
|
||||||
|
pshufb xgft2_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft2_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft2_hi, xgft2_lo ;GF add high and low partials
|
||||||
|
pxor xp5, xgft2_hi ;xp5 += partial
|
||||||
|
|
||||||
|
pshufb xgft3_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft3_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft3_hi, xgft3_lo ;GF add high and low partials
|
||||||
|
pxor xp6, xgft3_hi ;xp6 += partial
|
||||||
|
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
|
||||||
|
mov tmp, [dest+2*PS]
|
||||||
|
mov ptr, [dest+3*PS]
|
||||||
|
mov vec_i, [dest+4*PS]
|
||||||
|
|
||||||
|
XSTR [dest1+pos], xp1
|
||||||
|
XSTR [dest2+pos], xp2
|
||||||
|
XSTR [tmp+pos], xp3
|
||||||
|
mov tmp, [dest+5*PS]
|
||||||
|
XSTR [ptr+pos], xp4
|
||||||
|
XSTR [vec_i+pos], xp5
|
||||||
|
XSTR [tmp+pos], xp6
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 0
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
|
225
erasure/src/gf-inverse-test.c
Normal file
225
erasure/src/gf-inverse-test.c
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 128
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 200
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void matrix_mult(u8 * a, u8 * b, u8 * c, int n)
|
||||||
|
{
|
||||||
|
int i, j, k;
|
||||||
|
u8 d;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
d = 0;
|
||||||
|
for (k = 0; k < n; k++) {
|
||||||
|
d ^= gf_mul(a[n * i + k], b[n * k + j]);
|
||||||
|
}
|
||||||
|
c[i * n + j] = d;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_matrix(u8 * a, int n)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
printf(" %2x", a[i * n + j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int is_ident(u8 * a, const int n)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
u8 c;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
c = *a++;
|
||||||
|
if (i == j)
|
||||||
|
c--;
|
||||||
|
if (c != 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int inv_test(u8 * in, u8 * inv, u8 * sav, int n)
|
||||||
|
{
|
||||||
|
memcpy(sav, in, n * n);
|
||||||
|
|
||||||
|
if (gf_invert_matrix(in, inv, n)) {
|
||||||
|
printf("Given singular matrix\n");
|
||||||
|
print_matrix(sav, n);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix_mult(inv, sav, in, n);
|
||||||
|
|
||||||
|
if (is_ident(in, n)) {
|
||||||
|
printf("fail\n");
|
||||||
|
print_matrix(sav, n);
|
||||||
|
print_matrix(inv, n);
|
||||||
|
print_matrix(in, n);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, k, t;
|
||||||
|
u8 *test_mat, *save_mat, *invr_mat;
|
||||||
|
|
||||||
|
u8 test1[] = { 1, 1, 6,
|
||||||
|
1, 1, 1,
|
||||||
|
7, 1, 9
|
||||||
|
};
|
||||||
|
|
||||||
|
u8 test2[] = { 0, 1, 6,
|
||||||
|
1, 0, 1,
|
||||||
|
0, 1, 9
|
||||||
|
};
|
||||||
|
|
||||||
|
u8 test3[] = { 0, 0, 1,
|
||||||
|
1, 0, 0,
|
||||||
|
0, 1, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
u8 test4[] = { 0, 1, 6, 7,
|
||||||
|
1, 1, 0, 0,
|
||||||
|
0, 1, 2, 3,
|
||||||
|
3, 2, 2, 3
|
||||||
|
}; // = row3+3*row2
|
||||||
|
|
||||||
|
printf("gf_inverse_test: max=%d ", KMAX);
|
||||||
|
|
||||||
|
test_mat = malloc(KMAX * KMAX);
|
||||||
|
save_mat = malloc(KMAX * KMAX);
|
||||||
|
invr_mat = malloc(KMAX * KMAX);
|
||||||
|
|
||||||
|
if (NULL == test_mat || NULL == save_mat || NULL == invr_mat)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Test with lots of leading 1's
|
||||||
|
k = 3;
|
||||||
|
memcpy(test_mat, test1, k * k);
|
||||||
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Test with leading zeros
|
||||||
|
k = 3;
|
||||||
|
memcpy(test_mat, test2, k * k);
|
||||||
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Test 3
|
||||||
|
k = 3;
|
||||||
|
memcpy(test_mat, test3, k * k);
|
||||||
|
if (inv_test(test_mat, invr_mat, save_mat, k))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// Test 4 - try a singular matrix
|
||||||
|
k = 4;
|
||||||
|
memcpy(test_mat, test4, k * k);
|
||||||
|
if (!gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||||
|
printf("Fail: didn't catch singular matrix\n");
|
||||||
|
print_matrix(test4, 4);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Do random test of size KMAX
|
||||||
|
k = KMAX;
|
||||||
|
|
||||||
|
for (i = 0; i < k * k; i++)
|
||||||
|
test_mat[i] = save_mat[i] = rand();
|
||||||
|
|
||||||
|
if (gf_invert_matrix(test_mat, invr_mat, k)) {
|
||||||
|
printf("rand picked a singular matrix, try again\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||||
|
|
||||||
|
if (is_ident(test_mat, k)) {
|
||||||
|
printf("fail\n");
|
||||||
|
print_matrix(save_mat, k);
|
||||||
|
print_matrix(invr_mat, k);
|
||||||
|
print_matrix(test_mat, k);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Do Randoms. Random size and coefficients
|
||||||
|
for (t = 0; t < RANDOMS; t++) {
|
||||||
|
k = rand() % KMAX;
|
||||||
|
|
||||||
|
for (i = 0; i < k * k; i++)
|
||||||
|
test_mat[i] = save_mat[i] = rand();
|
||||||
|
|
||||||
|
if (gf_invert_matrix(test_mat, invr_mat, k))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
matrix_mult(invr_mat, save_mat, test_mat, k);
|
||||||
|
|
||||||
|
if (is_ident(test_mat, k)) {
|
||||||
|
printf("fail rand k=%d\n", k);
|
||||||
|
print_matrix(save_mat, k);
|
||||||
|
print_matrix(invr_mat, k);
|
||||||
|
print_matrix(test_mat, k);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 == (t % 8))
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
166
erasure/src/gf-vect-dot-prod-1tbl.c
Normal file
166
erasure/src/gf-vect-dot-prod-1tbl.c
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 4000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN GT_L3_CACHE / TEST_SOURCES
|
||||||
|
# define TEST_LOOPS 10
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
// Global GF(256) tables
|
||||||
|
u8 gff[256];
|
||||||
|
u8 gflog[256];
|
||||||
|
u8 gf_mul_table[256 * 256];
|
||||||
|
|
||||||
|
void mk_gf_field()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 s = 1;
|
||||||
|
gflog[0] = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
gff[i] = s;
|
||||||
|
gflog[s] = i;
|
||||||
|
s = (s << 1) ^ ((s & 0x80) ? 0x1d : 0); // mult by GF{2}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void mk_gf_mul_table(u8 * table)
|
||||||
|
{
|
||||||
|
// Populate a single table with all multiply combinations for a fast,
|
||||||
|
// single-table lookup of GF(2^8) multiply at the expense of memory.
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < 256; i++)
|
||||||
|
for (j = 0; j < 256; j++)
|
||||||
|
table[i * 256 + j] = gf_mul(i, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gf_vect_dot_prod_ref(int len, int vlen, u8 * v, u8 ** src, u8 * dest)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
u8 s;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < vlen; j++)
|
||||||
|
s ^= gf_mul(src[j][i], v[j]);
|
||||||
|
|
||||||
|
dest[i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int i, j, k;
|
||||||
|
u8 s, vec[TEST_SOURCES], dest1[TEST_LEN], dest2[TEST_LEN];
|
||||||
|
u8 *matrix[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
mk_gf_field();
|
||||||
|
mk_gf_mul_table(gf_mul_table);
|
||||||
|
|
||||||
|
//generate random vector and matrix/data
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
vec[i] = rand();
|
||||||
|
|
||||||
|
if (!(matrix[i] = malloc(TEST_LEN))) {
|
||||||
|
fprintf(stderr, "Error failure\n\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
matrix[i][j] = rand();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++)
|
||||||
|
gf_vect_dot_prod_ref(TEST_LEN, TEST_SOURCES, vec, matrix, dest1);
|
||||||
|
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_dot_prod_2tbl" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
|
||||||
|
// Warm up mult tables
|
||||||
|
for (i = 0; i < TEST_LEN; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
|
||||||
|
}
|
||||||
|
dest2[i] = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (k = 0; k < TEST_LOOPS; k++) {
|
||||||
|
for (i = 0; i < TEST_LEN; i++) {
|
||||||
|
s = 0;
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++) {
|
||||||
|
s ^= gf_mul_table[vec[j] * 256 + matrix[j][i]];
|
||||||
|
}
|
||||||
|
dest2[i] = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_dot_prod_1tbl" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * k);
|
||||||
|
|
||||||
|
// Compare with reference function
|
||||||
|
if (0 != memcmp(dest1, dest2, TEST_LEN)) {
|
||||||
|
printf("Error, different results!\n\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Pass functional test\n");
|
||||||
|
return 0;
|
||||||
|
}
|
184
erasure/src/gf-vect-dot-prod-avx-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-avx-perf.c
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||||
|
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
}
|
525
erasure/src/gf-vect-dot-prod-avx-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-avx-test.c
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptr;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
} else
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||||
|
dump_matrix(buffs, 5, srcs);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 5);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 5);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code using gf_vect_dot_prod
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random buffers in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= k) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
}
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
printf(" %d", src_err_list[i]);
|
||||||
|
printf("\na:\n");
|
||||||
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, align);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(udest_ptr, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptr - dest;
|
||||||
|
|
||||||
|
if (memcmp(dest, dest_ref, offset)) {
|
||||||
|
printf("Fail rand ualign pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
198
erasure/src/gf-vect-dot-prod-avx.asm
Normal file
198
erasure/src/gf-vect-dot-prod-avx.asm
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_vect_dot_prod_avx(len, vec, *g_tbls, **buffs, *dest);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved and loaded
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 rdi ; must be saved and loaded
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define frame_size 2*8
|
||||||
|
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
rex_push_reg r12
|
||||||
|
push_reg rdi
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop rdi
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm5
|
||||||
|
%define xgft_lo xmm4
|
||||||
|
%define xgft_hi xmm3
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp xmm2
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_vect_dot_prod_avx:function
|
||||||
|
func(gf_vect_dot_prod_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
vpxor xp, xp
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i*PS]
|
||||||
|
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
vmovdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, 1
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||||
|
vpxor xp, xp, xgft_hi ;xp += partial
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest+pos], xp
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
|
||||||
|
poly:
|
||||||
|
mask0f:
|
||||||
|
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_vect_dot_prod_avx, 02, 03, 0061
|
203
erasure/src/gf-vect-dot-prod-avx2.asm
Normal file
203
erasure/src/gf-vect-dot-prod-avx2.asm
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_vect_dot_prod_avx2(len, vec, *g_tbls, **buffs, *dest);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved and loaded
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp.w r11d
|
||||||
|
%define tmp.b r11b
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 rdi ; must be saved and loaded
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define frame_size 2*8
|
||||||
|
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
rex_push_reg r12
|
||||||
|
push_reg rdi
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop rdi
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR vmovdqu
|
||||||
|
%define XSTR vmovdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f ymm3
|
||||||
|
%define xmask0fx xmm3
|
||||||
|
%define xgft_lo ymm4
|
||||||
|
%define xgft_hi ymm5
|
||||||
|
|
||||||
|
%define x0 ymm0
|
||||||
|
%define xtmpa ymm1
|
||||||
|
%define xp ymm2
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_vect_dot_prod_avx2:function
|
||||||
|
func(gf_vect_dot_prod_avx2)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 32
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
mov tmp.b, 0x0f
|
||||||
|
vpinsrb xmask0fx, xmask0fx, tmp.w, 0
|
||||||
|
vpbroadcastb xmask0f, xmask0fx ;Construct mask 0x0f0f0f...
|
||||||
|
|
||||||
|
.loop32:
|
||||||
|
vpxor xp, xp
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i*PS]
|
||||||
|
|
||||||
|
vmovdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||||
|
; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||||
|
vperm2i128 xgft_hi, xgft_lo, xgft_lo, 0x11 ; swapped to hi | hi
|
||||||
|
vperm2i128 xgft_lo, xgft_lo, xgft_lo, 0x00 ; swapped to lo | lo
|
||||||
|
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, 1
|
||||||
|
|
||||||
|
vpand xtmpa, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
|
||||||
|
vpshufb xgft_hi, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xgft_lo, xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
vpxor xgft_hi, xgft_hi, xgft_lo ;GF add high and low partials
|
||||||
|
vpxor xp, xp, xgft_hi ;xp += partial
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest+pos], xp
|
||||||
|
add pos, 32 ;Loop on 32 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop32
|
||||||
|
|
||||||
|
lea tmp, [len + 32]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-32
|
||||||
|
jmp .loop32 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
|
290
erasure/src/gf-vect-dot-prod-base-test.c
Normal file
290
erasure/src/gf-vect-dot-prod-base-test.c
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 250
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, m, k, nerrs, r, err;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
printf("gf_vect_dot_prod_base: %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Init
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
// Test erasure code using gf_vect_dot_prod
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
gf_gen_cauchy1_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random buffers in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
gf_gen_cauchy1_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= k) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
}
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
printf(" %d", src_err_list[i]);
|
||||||
|
printf("\na:\n");
|
||||||
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
184
erasure/src/gf-vect-dot-prod-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-perf.c
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||||
|
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
}
|
184
erasure/src/gf-vect-dot-prod-sse-perf.c
Normal file
184
erasure/src/gf-vect-dot-prod-sse-perf.c
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 40000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1))
|
||||||
|
# define TEST_LOOPS 100
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref;
|
||||||
|
u8 *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Performance test
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
#ifdef DO_REF_PERF
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
for (j = 0; j < TEST_SOURCES; j++)
|
||||||
|
gf_vect_mul_init(g[j], &g_tbls[j * 32]);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("pass perf check\n");
|
||||||
|
return 0;
|
||||||
|
}
|
525
erasure/src/gf-vect-dot-prod-sse-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-sse-test.c
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptr;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
} else
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||||
|
dump_matrix(buffs, 5, srcs);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 5);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 5);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code using gf_vect_dot_prod
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random buffers in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= k) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
}
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
printf(" %d", src_err_list[i]);
|
||||||
|
printf("\na:\n");
|
||||||
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, align);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(udest_ptr, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptr - dest;
|
||||||
|
|
||||||
|
if (memcmp(dest, dest_ref, offset)) {
|
||||||
|
printf("Fail rand ualign pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
195
erasure/src/gf-vect-dot-prod-sse.asm
Normal file
195
erasure/src/gf-vect-dot-prod-sse.asm
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_vect_dot_prod_sse(len, vec, *g_tbls, **buffs, *dest);
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 r9
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
|
||||||
|
%define arg4 r12 ; must be saved and loaded
|
||||||
|
%define tmp r11
|
||||||
|
%define tmp2 r10
|
||||||
|
%define tmp3 rdi ; must be saved and loaded
|
||||||
|
%define return rax
|
||||||
|
%define PS 8
|
||||||
|
%define frame_size 2*8
|
||||||
|
%define arg(x) [rsp + frame_size + PS + PS*x]
|
||||||
|
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
rex_push_reg r12
|
||||||
|
push_reg rdi
|
||||||
|
end_prolog
|
||||||
|
mov arg4, arg(4)
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
pop rdi
|
||||||
|
pop r12
|
||||||
|
%endmacro
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define vec arg1
|
||||||
|
%define mul_array arg2
|
||||||
|
%define src arg3
|
||||||
|
%define dest arg4
|
||||||
|
|
||||||
|
%define vec_i tmp2
|
||||||
|
%define ptr tmp3
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
%ifndef EC_ALIGNED_ADDR
|
||||||
|
;;; Use Un-aligned load/store
|
||||||
|
%define XLDR movdqu
|
||||||
|
%define XSTR movdqu
|
||||||
|
%else
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm5
|
||||||
|
%define xgft_lo xmm4
|
||||||
|
%define xgft_hi xmm3
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmpa xmm1
|
||||||
|
%define xp xmm2
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_vect_dot_prod_sse:function
|
||||||
|
func(gf_vect_dot_prod_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
sub len, 16
|
||||||
|
jl .return_fail
|
||||||
|
xor pos, pos
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
|
||||||
|
.loop16:
|
||||||
|
pxor xp, xp
|
||||||
|
mov tmp, mul_array
|
||||||
|
xor vec_i, vec_i
|
||||||
|
|
||||||
|
.next_vect:
|
||||||
|
mov ptr, [src+vec_i*PS]
|
||||||
|
movdqu xgft_lo, [tmp] ;Load array Cx{00}, Cx{01}, ..., Cx{0f}
|
||||||
|
movdqu xgft_hi, [tmp+16] ; " Cx{00}, Cx{10}, ..., Cx{f0}
|
||||||
|
XLDR x0, [ptr+pos] ;Get next source vector
|
||||||
|
add tmp, 32
|
||||||
|
add vec_i, 1
|
||||||
|
movdqa xtmpa, x0 ;Keep unshifted copy of src
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand xtmpa, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
pshufb xgft_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xgft_lo, xtmpa ;Lookup mul table of low nibble
|
||||||
|
pxor xgft_hi, xgft_lo ;GF add high and low partials
|
||||||
|
pxor xp, xgft_hi ;xp += partial
|
||||||
|
cmp vec_i, vec
|
||||||
|
jl .next_vect
|
||||||
|
|
||||||
|
XSTR [dest+pos], xp
|
||||||
|
|
||||||
|
add pos, 16 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
jle .loop16
|
||||||
|
|
||||||
|
lea tmp, [len + 16]
|
||||||
|
cmp pos, tmp
|
||||||
|
je .return_pass
|
||||||
|
|
||||||
|
;; Tail len
|
||||||
|
mov pos, len ;Overlapped offset length-16
|
||||||
|
jmp .loop16 ;Do one more overlap pass
|
||||||
|
|
||||||
|
.return_pass:
|
||||||
|
mov return, 0
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
.return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f: ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_vect_dot_prod_sse, 00, 03, 0060
|
525
erasure/src/gf-vect-dot-prod-test.c
Normal file
525
erasure/src/gf-vect-dot-prod-test.c
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset, memcmp
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/types.h"
|
||||||
|
|
||||||
|
#ifndef FUNCTION_UNDER_TEST
|
||||||
|
# define FUNCTION_UNDER_TEST gf_vect_dot_prod
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_MIN_SIZE
|
||||||
|
# define TEST_MIN_SIZE 32
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define str(s) #s
|
||||||
|
#define xstr(s) str(s)
|
||||||
|
|
||||||
|
#define TEST_LEN 8192
|
||||||
|
#define TEST_SIZE (TEST_LEN/2)
|
||||||
|
|
||||||
|
#ifndef TEST_SOURCES
|
||||||
|
# define TEST_SOURCES 16
|
||||||
|
#endif
|
||||||
|
#ifndef RANDOMS
|
||||||
|
# define RANDOMS 20
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMAX TEST_SOURCES
|
||||||
|
#define KMAX TEST_SOURCES
|
||||||
|
|
||||||
|
#ifdef EC_ALIGNED_ADDR
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 0
|
||||||
|
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
|
||||||
|
#else
|
||||||
|
// Define power of 2 range to check ptr, len alignment
|
||||||
|
# define PTR_ALIGN_CHK_B 32
|
||||||
|
# define LEN_ALIGN_CHK_B 32 // 0 for aligned only
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
void dump(unsigned char *buf, int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len;) {
|
||||||
|
printf(" %2x", 0xff & buf[i++]);
|
||||||
|
if (i % 32 == 0)
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_matrix(unsigned char **s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", s[i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_u8xu8(unsigned char *s, int k, int m)
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < k; i++) {
|
||||||
|
for (j = 0; j < m; j++) {
|
||||||
|
printf(" %2x", 0xff & s[j + (i * m)]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j, rtest, srcs, m, k, nerrs, r, err;
|
||||||
|
void *buf;
|
||||||
|
u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES];
|
||||||
|
u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES];
|
||||||
|
u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX];
|
||||||
|
u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES];
|
||||||
|
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buffs[TEST_SOURCES];
|
||||||
|
unsigned int offset;
|
||||||
|
u8 *ubuffs[TEST_SOURCES];
|
||||||
|
u8 *udest_ptr;
|
||||||
|
|
||||||
|
printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN);
|
||||||
|
|
||||||
|
// Allocate the arrays
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) {
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buffs[i] = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
dest_ref = buf;
|
||||||
|
|
||||||
|
if (posix_memalign(&buf, 64, TEST_LEN)) {
|
||||||
|
printf("alloc error: Fail");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
temp_buff = buf;
|
||||||
|
|
||||||
|
// Test of all zeros
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
memset(buffs[i], 0, TEST_LEN);
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN);
|
||||||
|
memset(temp_buff, 0, TEST_LEN);
|
||||||
|
memset(dest_ref, 0, TEST_LEN);
|
||||||
|
memset(g, 0, TEST_SOURCES);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
} else
|
||||||
|
putchar('.');
|
||||||
|
|
||||||
|
// Rand data test
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n");
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rand data test with varied parameters
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
for (srcs = TEST_SOURCES; srcs > 0; srcs--) {
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, TEST_LEN)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n");
|
||||||
|
dump_matrix(buffs, 5, srcs);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 5);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 5);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test erasure code using gf_vect_dot_prod
|
||||||
|
|
||||||
|
// Pick a first test
|
||||||
|
m = 9;
|
||||||
|
k = 5;
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random buffers in erasure
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do more random tests
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
while ((m = (rand() % MMAX)) < 2) ;
|
||||||
|
while ((k = (rand() % KMAX)) >= m || k < 1) ;
|
||||||
|
|
||||||
|
if (m > MMAX || k > KMAX)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
gf_gen_rs_matrix(a, m, k);
|
||||||
|
|
||||||
|
// Make random data
|
||||||
|
for (i = 0; i < k; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
// Make parity vects
|
||||||
|
for (i = k; i < m; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Random errors
|
||||||
|
memset(src_in_err, 0, TEST_SOURCES);
|
||||||
|
for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) {
|
||||||
|
err = 1 & rand();
|
||||||
|
src_in_err[i] = err;
|
||||||
|
if (err)
|
||||||
|
src_err_list[nerrs++] = i;
|
||||||
|
}
|
||||||
|
if (nerrs == 0) { // should have at least one error
|
||||||
|
while ((err = (rand() % KMAX)) >= k) ;
|
||||||
|
src_err_list[nerrs++] = err;
|
||||||
|
src_in_err[err] = 1;
|
||||||
|
}
|
||||||
|
// construct b by removing error rows
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
b[k * i + j] = a[k * r + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0)
|
||||||
|
printf("BAD MATRIX\n");
|
||||||
|
|
||||||
|
for (i = 0, r = 0; i < k; i++, r++) {
|
||||||
|
while (src_in_err[r]) {
|
||||||
|
r++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
recov[i] = buffs[r];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recover data
|
||||||
|
for (i = 0; i < nerrs; i++) {
|
||||||
|
for (j = 0; j < k; j++)
|
||||||
|
gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]);
|
||||||
|
#ifndef USEREF
|
||||||
|
FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff);
|
||||||
|
#else
|
||||||
|
gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff);
|
||||||
|
#endif
|
||||||
|
if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) {
|
||||||
|
printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs);
|
||||||
|
printf(" - erase list = ");
|
||||||
|
for (i = 0; i < nerrs; i++)
|
||||||
|
printf(" %d", src_err_list[i]);
|
||||||
|
printf("\na:\n");
|
||||||
|
dump_u8xu8((u8 *) a, m, k);
|
||||||
|
printf("inv b:\n");
|
||||||
|
dump_u8xu8((u8 *) d, k, k);
|
||||||
|
printf("orig data:\n");
|
||||||
|
dump_matrix(buffs, m, 25);
|
||||||
|
printf("orig :");
|
||||||
|
dump(buffs[src_err_list[i]], 25);
|
||||||
|
printf("recov %d:", src_err_list[i]);
|
||||||
|
dump(temp_buff, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) {
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
for (j = 0; j < TEST_LEN; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end
|
||||||
|
efence_buffs[i] = buffs[i] + TEST_LEN - size;
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SOURCES; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref);
|
||||||
|
FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest);
|
||||||
|
|
||||||
|
if (0 != memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n");
|
||||||
|
dump_matrix(efence_buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, align);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, align);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test rand ptr alignment if available
|
||||||
|
|
||||||
|
for (rtest = 0; rtest < RANDOMS; rtest++) {
|
||||||
|
size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1);
|
||||||
|
srcs = rand() % TEST_SOURCES;
|
||||||
|
if (srcs == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B;
|
||||||
|
// Add random offsets
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset));
|
||||||
|
|
||||||
|
memset(dest, 0, TEST_LEN); // zero pad to check write-over
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
ubuffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, udest_ptr, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n",
|
||||||
|
srcs);
|
||||||
|
dump_matrix(ubuffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(udest_ptr, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Confirm that padding around dests is unchanged
|
||||||
|
memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff
|
||||||
|
offset = udest_ptr - dest;
|
||||||
|
|
||||||
|
if (memcmp(dest, dest_ref, offset)) {
|
||||||
|
printf("Fail rand ualign pad start\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) {
|
||||||
|
printf("Fail rand ualign pad end\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test all size alignment
|
||||||
|
align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16;
|
||||||
|
|
||||||
|
for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) {
|
||||||
|
srcs = TEST_SOURCES;
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
for (j = 0; j < size; j++)
|
||||||
|
buffs[i][j] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
g[i] = rand();
|
||||||
|
|
||||||
|
for (i = 0; i < srcs; i++)
|
||||||
|
gf_vect_mul_init(g[i], &g_tbls[i * 32]);
|
||||||
|
|
||||||
|
gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref);
|
||||||
|
|
||||||
|
FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest);
|
||||||
|
|
||||||
|
if (memcmp(dest_ref, dest, size)) {
|
||||||
|
printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n",
|
||||||
|
size);
|
||||||
|
dump_matrix(buffs, 5, TEST_SOURCES);
|
||||||
|
printf("dprod_base:");
|
||||||
|
dump(dest_ref, 25);
|
||||||
|
printf("dprod:");
|
||||||
|
dump(dest, 25);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("done all: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
99
erasure/src/gf-vect-mul-avx-perf.c
Normal file
99
erasure/src/gf-vect-mul-avx-perf.c
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 4000000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN GT_L3_CACHE / 2
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TEST_MEM (2 * TEST_LEN)
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_avx_perf:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
// Allocate large mem region
|
||||||
|
buff1 = (u8 *) malloc(TEST_LEN);
|
||||||
|
buff2 = (u8 *) malloc(TEST_LEN);
|
||||||
|
if (NULL == buff1 || NULL == buff2) {
|
||||||
|
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(buff1, 0, TEST_LEN);
|
||||||
|
memset(buff2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
printf("Start timed tests\n");
|
||||||
|
fflush(0);
|
||||||
|
|
||||||
|
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_mul_avx" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
143
erasure/src/gf-vect-mul-avx-test.c
Normal file
143
erasure/src/gf-vect-mul-avx-test.c
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
#define TEST_SIZE 8192
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 100000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buff1;
|
||||||
|
unsigned char *efence_buff2;
|
||||||
|
unsigned char *efence_buff3;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_avx:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
|
printf("buffer alloc error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Fill with rand data
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||||
|
gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (buff2[i] != buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
// Check each possible constant
|
||||||
|
printf("Random tests ");
|
||||||
|
for (a = 0; a != 255; a++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = 32;
|
||||||
|
a = 2;
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
|
// Line up TEST_SIZE from end
|
||||||
|
efence_buff1 = buff1 + size;
|
||||||
|
efence_buff2 = buff2 + size;
|
||||||
|
efence_buff3 = buff3 + size;
|
||||||
|
|
||||||
|
gf_vect_mul_avx(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||||
|
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||||
|
efence_buff1
|
||||||
|
[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (efence_buff2[i] != efence_buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, efence_buff2[i], efence_buff3[i], gf_mul(2,
|
||||||
|
efence_buff1
|
||||||
|
[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" done: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
172
erasure/src/gf-vect-mul-avx.asm
Normal file
172
erasure/src/gf-vect-mul-avx.asm
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_vect_mul_avx(len, mul_array, src, dest)
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
%define tmp r11
|
||||||
|
%define return rax
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
|
%elifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
%define return rax
|
||||||
|
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm13, 2*16
|
||||||
|
save_xmm128 xmm14, 3*16
|
||||||
|
save_xmm128 xmm15, 4*16
|
||||||
|
end_prolog
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
vmovdqa xmm6, [rsp + 0*16]
|
||||||
|
vmovdqa xmm7, [rsp + 1*16]
|
||||||
|
vmovdqa xmm13, [rsp + 2*16]
|
||||||
|
vmovdqa xmm14, [rsp + 3*16]
|
||||||
|
vmovdqa xmm15, [rsp + 4*16]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define mul_array arg1
|
||||||
|
%define src arg2
|
||||||
|
%define dest arg3
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR vmovdqa
|
||||||
|
%define XSTR vmovdqa
|
||||||
|
%else
|
||||||
|
%define XLDR vmovntdqa
|
||||||
|
%define XSTR vmovntdq
|
||||||
|
%endif
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft_lo xmm14
|
||||||
|
%define xgft_hi xmm13
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmp1a xmm1
|
||||||
|
%define xtmp1b xmm2
|
||||||
|
%define xtmp1c xmm3
|
||||||
|
%define x1 xmm4
|
||||||
|
%define xtmp2a xmm5
|
||||||
|
%define xtmp2b xmm6
|
||||||
|
%define xtmp2c xmm7
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_vect_mul_avx:function
|
||||||
|
func(gf_vect_mul_avx)
|
||||||
|
FUNC_SAVE
|
||||||
|
mov pos, 0
|
||||||
|
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
vmovdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||||
|
vmovdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||||
|
|
||||||
|
loop32:
|
||||||
|
XLDR x0, [src+pos] ;Get next source vector
|
||||||
|
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||||
|
add pos, 32 ;Loop on 16 bytes at a time
|
||||||
|
cmp pos, len
|
||||||
|
vpand xtmp1a, x0, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
vpand xtmp2a, x1, xmask0f
|
||||||
|
vpsraw x0, x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
vpsraw x1, x1, 4
|
||||||
|
vpand x0, x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
vpand x1, x1, xmask0f
|
||||||
|
vpshufb xtmp1b, xgft_hi, x0 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xtmp1c, xgft_lo, xtmp1a ;Lookup mul table of low nibble
|
||||||
|
vpshufb xtmp2b, xgft_hi, x1 ;Lookup mul table of high nibble
|
||||||
|
vpshufb xtmp2c, xgft_lo, xtmp2a ;Lookup mul table of low nibble
|
||||||
|
vpxor xtmp1b, xtmp1b, xtmp1c ;GF add high and low partials
|
||||||
|
vpxor xtmp2b, xtmp2b, xtmp2c
|
||||||
|
XSTR [dest+pos-32], xtmp1b ;Store result
|
||||||
|
XSTR [dest+pos-16], xtmp2b ;Store +16B result
|
||||||
|
jl loop32
|
||||||
|
|
||||||
|
|
||||||
|
return_pass:
|
||||||
|
FUNC_RESTORE
|
||||||
|
sub pos, len
|
||||||
|
ret
|
||||||
|
|
||||||
|
return_fail:
|
||||||
|
FUNC_RESTORE
|
||||||
|
mov return, 1
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
|
||||||
|
mask0f:
|
||||||
|
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_vect_mul_avx, 01, 02, 0036
|
129
erasure/src/gf-vect-mul-base-test.c
Normal file
129
erasure/src/gf-vect-mul-base-test.c
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
#define TEST_SIZE 8192
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 100000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buff1;
|
||||||
|
unsigned char *efence_buff2;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_base_test:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
|
printf("buffer alloc error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Fill with rand data
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||||
|
gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (buff2[i] != buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
// Check each possible constant
|
||||||
|
printf("Random tests ");
|
||||||
|
for (a = 0; a != 255; a++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = 32;
|
||||||
|
a = 2;
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
|
// Line up TEST_SIZE from end
|
||||||
|
efence_buff1 = buff1 + size;
|
||||||
|
efence_buff2 = buff2 + size;
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||||
|
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||||
|
efence_buff1
|
||||||
|
[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" done: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
99
erasure/src/gf-vect-mul-perf.c
Normal file
99
erasure/src/gf-vect-mul-perf.c
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 4000000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN GT_L3_CACHE / 2
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TEST_MEM (2 * TEST_LEN)
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_perf:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
// Allocate large mem region
|
||||||
|
buff1 = (u8 *) malloc(TEST_LEN);
|
||||||
|
buff2 = (u8 *) malloc(TEST_LEN);
|
||||||
|
if (NULL == buff1 || NULL == buff2) {
|
||||||
|
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(buff1, 0, TEST_LEN);
|
||||||
|
memset(buff2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
printf("Start timed tests\n");
|
||||||
|
fflush(0);
|
||||||
|
|
||||||
|
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_mul" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
97
erasure/src/gf-vect-mul-sse-perf.c
Normal file
97
erasure/src/gf-vect-mul-sse-perf.c
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
#include "erasure/tests.h"
|
||||||
|
|
||||||
|
//#define CACHED_TEST
|
||||||
|
#ifdef CACHED_TEST
|
||||||
|
// Cached test, loop many times over small dataset
|
||||||
|
# define TEST_LEN 8*1024
|
||||||
|
# define TEST_LOOPS 4000000
|
||||||
|
# define TEST_TYPE_STR "_warm"
|
||||||
|
#else
|
||||||
|
# ifndef TEST_CUSTOM
|
||||||
|
// Uncached test. Pull from large mem base.
|
||||||
|
# define TEST_SOURCES 10
|
||||||
|
# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */
|
||||||
|
# define TEST_LEN GT_L3_CACHE / 2
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# define TEST_TYPE_STR "_cold"
|
||||||
|
# else
|
||||||
|
# define TEST_TYPE_STR "_cus"
|
||||||
|
# ifndef TEST_LOOPS
|
||||||
|
# define TEST_LOOPS 1000
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TEST_MEM (2 * TEST_LEN)
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, gf_const_tbl[64], a = 2;
|
||||||
|
struct perf start, stop;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_sse_perf:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
// Allocate large mem region
|
||||||
|
buff1 = (u8 *) malloc(TEST_LEN);
|
||||||
|
buff2 = (u8 *) malloc(TEST_LEN);
|
||||||
|
if (NULL == buff1 || NULL == buff2) {
|
||||||
|
printf("Failed to allocate %dB\n", TEST_LEN);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(buff1, 0, TEST_LEN);
|
||||||
|
memset(buff2, 0, TEST_LEN);
|
||||||
|
|
||||||
|
printf("Start timed tests\n");
|
||||||
|
fflush(0);
|
||||||
|
|
||||||
|
gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
perf_start(&start);
|
||||||
|
for (i = 0; i < TEST_LOOPS; i++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl); // in a re-build would only calc once
|
||||||
|
gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2);
|
||||||
|
}
|
||||||
|
perf_stop(&stop);
|
||||||
|
printf("gf_vect_mul_sse" TEST_TYPE_STR ": ");
|
||||||
|
perf_print(stop, start, (long long)TEST_LEN * i);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
160
erasure/src/gf-vect-mul-sse-test.c
Normal file
160
erasure/src/gf-vect-mul-sse-test.c
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
#define TEST_SIZE (128*1024)
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||||
|
int tsize;
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buff1;
|
||||||
|
unsigned char *efence_buff2;
|
||||||
|
unsigned char *efence_buff3;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_sse_test: ");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
|
printf("buffer alloc error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Fill with rand data
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++) {
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i,
|
||||||
|
buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE; i++) {
|
||||||
|
if (buff2[i] != buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
// Check each possible constant
|
||||||
|
for (a = 0; a != 255; a++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check buffer len
|
||||||
|
for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) {
|
||||||
|
a = rand();
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul_sse(tsize, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < tsize; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (0 == tsize % (32 * 8)) {
|
||||||
|
putchar('.');
|
||||||
|
fflush(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = 32;
|
||||||
|
a = 2;
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
|
// Line up TEST_SIZE from end
|
||||||
|
efence_buff1 = buff1 + size;
|
||||||
|
efence_buff2 = buff2 + size;
|
||||||
|
efence_buff3 = buff3 + size;
|
||||||
|
|
||||||
|
gf_vect_mul_sse(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||||
|
i, efence_buff1[i], efence_buff2[i], gf_mul(2,
|
||||||
|
efence_buff1
|
||||||
|
[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (efence_buff2[i] != efence_buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, efence_buff2[i], efence_buff3[i], gf_mul(2,
|
||||||
|
efence_buff1
|
||||||
|
[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" done: Pass\n");
|
||||||
|
fflush(0);
|
||||||
|
return 0;
|
||||||
|
}
|
178
erasure/src/gf-vect-mul-sse.asm
Normal file
178
erasure/src/gf-vect-mul-sse.asm
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
;
|
||||||
|
; Redistribution and use in source and binary forms, with or without
|
||||||
|
; modification, are permitted provided that the following conditions
|
||||||
|
; are met:
|
||||||
|
; * Redistributions of source code must retain the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer.
|
||||||
|
; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
; notice, this list of conditions and the following disclaimer in
|
||||||
|
; the documentation and/or other materials provided with the
|
||||||
|
; distribution.
|
||||||
|
; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
; contributors may be used to endorse or promote products derived
|
||||||
|
; from this software without specific prior written permission.
|
||||||
|
;
|
||||||
|
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; gf_vect_mul_sse(len, mul_array, src, dest)
|
||||||
|
;;;
|
||||||
|
;;; Author: Gregory Tucker
|
||||||
|
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, elf64
|
||||||
|
%define arg0 rdi
|
||||||
|
%define arg1 rsi
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 rcx
|
||||||
|
%define arg4 r8
|
||||||
|
%define arg5 r9
|
||||||
|
%define tmp r11
|
||||||
|
%define return rax
|
||||||
|
%define func(x) x:
|
||||||
|
%define FUNC_SAVE
|
||||||
|
%define FUNC_RESTORE
|
||||||
|
|
||||||
|
%elifidn __OUTPUT_FORMAT__, win64
|
||||||
|
%define arg0 rcx
|
||||||
|
%define arg1 rdx
|
||||||
|
%define arg2 r8
|
||||||
|
%define arg3 r9
|
||||||
|
%define return rax
|
||||||
|
%define stack_size 5*16 + 8 ; must be an odd multiple of 8
|
||||||
|
%define func(x) proc_frame x
|
||||||
|
%macro FUNC_SAVE 0
|
||||||
|
alloc_stack stack_size
|
||||||
|
save_xmm128 xmm6, 0*16
|
||||||
|
save_xmm128 xmm7, 1*16
|
||||||
|
save_xmm128 xmm13, 2*16
|
||||||
|
save_xmm128 xmm14, 3*16
|
||||||
|
save_xmm128 xmm15, 4*16
|
||||||
|
end_prolog
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro FUNC_RESTORE 0
|
||||||
|
movdqa xmm6, [rsp + 0*16]
|
||||||
|
movdqa xmm7, [rsp + 1*16]
|
||||||
|
movdqa xmm13, [rsp + 2*16]
|
||||||
|
movdqa xmm14, [rsp + 3*16]
|
||||||
|
movdqa xmm15, [rsp + 4*16]
|
||||||
|
add rsp, stack_size
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
%define len arg0
|
||||||
|
%define mul_array arg1
|
||||||
|
%define src arg2
|
||||||
|
%define dest arg3
|
||||||
|
%define pos return
|
||||||
|
|
||||||
|
|
||||||
|
;;; Use Non-temporal load/stor
|
||||||
|
%ifdef NO_NT_LDST
|
||||||
|
%define XLDR movdqa
|
||||||
|
%define XSTR movdqa
|
||||||
|
%else
|
||||||
|
%define XLDR movntdqa
|
||||||
|
%define XSTR movntdq
|
||||||
|
%endif
|
||||||
|
|
||||||
|
default rel
|
||||||
|
|
||||||
|
[bits 64]
|
||||||
|
section .text
|
||||||
|
|
||||||
|
%define xmask0f xmm15
|
||||||
|
%define xgft_lo xmm14
|
||||||
|
%define xgft_hi xmm13
|
||||||
|
|
||||||
|
%define x0 xmm0
|
||||||
|
%define xtmp1a xmm1
|
||||||
|
%define xtmp1b xmm2
|
||||||
|
%define xtmp1c xmm3
|
||||||
|
%define x1 xmm4
|
||||||
|
%define xtmp2a xmm5
|
||||||
|
%define xtmp2b xmm6
|
||||||
|
%define xtmp2c xmm7
|
||||||
|
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global gf_vect_mul_sse:function
|
||||||
|
func(gf_vect_mul_sse)
|
||||||
|
FUNC_SAVE
|
||||||
|
mov pos, 0
|
||||||
|
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||||
|
movdqu xgft_lo, [mul_array] ;Load array Cx{00}, Cx{01}, Cx{02}, ...
|
||||||
|
movdqu xgft_hi, [mul_array+16] ; " Cx{00}, Cx{10}, Cx{20}, ... , Cx{f0}
|
||||||
|
|
||||||
|
loop32:
|
||||||
|
XLDR x0, [src+pos] ;Get next source vector
|
||||||
|
XLDR x1, [src+pos+16] ;Get next source vector + 16B ahead
|
||||||
|
movdqa xtmp1b, xgft_hi ;Reload const array registers
|
||||||
|
movdqa xtmp1c, xgft_lo
|
||||||
|
movdqa xtmp2b, xgft_hi
|
||||||
|
movdqa xtmp2c, xgft_lo
|
||||||
|
movdqa xtmp1a, x0 ;Keep unshifted copy of src
|
||||||
|
movdqa xtmp2a, x1
|
||||||
|
psraw x0, 4 ;Shift to put high nibble into bits 4-0
|
||||||
|
psraw x1, 4
|
||||||
|
pand xtmp1a, xmask0f ;Mask low src nibble in bits 4-0
|
||||||
|
pand xtmp2a, xmask0f
|
||||||
|
pand x0, xmask0f ;Mask high src nibble in bits 4-0
|
||||||
|
pand x1, xmask0f
|
||||||
|
pshufb xtmp1b, x0 ;Lookup mul table of high nibble
|
||||||
|
pshufb xtmp1c, xtmp1a ;Lookup mul table of low nibble
|
||||||
|
pshufb xtmp2b, x1
|
||||||
|
pshufb xtmp2c, xtmp2a
|
||||||
|
pxor xtmp1b, xtmp1c ;GF add high and low partials
|
||||||
|
pxor xtmp2b, xtmp2c
|
||||||
|
XSTR [dest+pos], xtmp1b ;Store result
|
||||||
|
XSTR [dest+pos+16], xtmp2b ;Store +16B result
|
||||||
|
add pos, 32 ;Loop on 32 bytes at at time
|
||||||
|
cmp pos, len
|
||||||
|
jl loop32
|
||||||
|
|
||||||
|
|
||||||
|
return_pass:
|
||||||
|
sub pos, len
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
return_fail:
|
||||||
|
mov return, 1
|
||||||
|
FUNC_RESTORE
|
||||||
|
ret
|
||||||
|
|
||||||
|
endproc_frame
|
||||||
|
|
||||||
|
section .data
|
||||||
|
|
||||||
|
align 16
|
||||||
|
mask0f:
|
||||||
|
ddq 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
||||||
|
|
||||||
|
%macro slversion 4
|
||||||
|
global %1_slver_%2%3%4
|
||||||
|
global %1_slver
|
||||||
|
%1_slver:
|
||||||
|
%1_slver_%2%3%4:
|
||||||
|
dw 0x%4
|
||||||
|
db 0x%3, 0x%2
|
||||||
|
%endmacro
|
||||||
|
;;; func core, ver, snum
|
||||||
|
slversion gf_vect_mul_sse, 00, 02, 0034
|
142
erasure/src/gf-vect-mul-test.c
Normal file
142
erasure/src/gf-vect-mul-test.c
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/**********************************************************************
|
||||||
|
Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h> // for memset
|
||||||
|
#include "erasure-code.h"
|
||||||
|
|
||||||
|
#define TEST_SIZE 8192
|
||||||
|
#define TEST_MEM TEST_SIZE
|
||||||
|
#define TEST_LOOPS 100000
|
||||||
|
#define TEST_TYPE_STR ""
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2;
|
||||||
|
int align, size;
|
||||||
|
unsigned char *efence_buff1;
|
||||||
|
unsigned char *efence_buff2;
|
||||||
|
unsigned char *efence_buff3;
|
||||||
|
|
||||||
|
printf("gf_vect_mul_test:\n");
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
|
||||||
|
buff1 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff2 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
buff3 = (u8 *) malloc(TEST_SIZE);
|
||||||
|
|
||||||
|
if (NULL == buff1 || NULL == buff2 || NULL == buff3) {
|
||||||
|
printf("buffer alloc error\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// Fill with rand data
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i],
|
||||||
|
gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
if (buff2[i] != buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(a, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++)
|
||||||
|
buff1[i] = rand();
|
||||||
|
|
||||||
|
// Check each possible constant
|
||||||
|
printf("Random tests ");
|
||||||
|
for (a = 0; a != 255; a++) {
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE; i++) {
|
||||||
|
if (gf_mul(a, buff1[i]) != buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n",
|
||||||
|
i, a, buff1[i], buff2[i], gf_mul(2, buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run tests at end of buffer for Electric Fence
|
||||||
|
align = 32;
|
||||||
|
a = 2;
|
||||||
|
|
||||||
|
gf_vect_mul_init(a, gf_const_tbl);
|
||||||
|
for (size = 0; size < TEST_SIZE; size += align) {
|
||||||
|
// Line up TEST_SIZE from end
|
||||||
|
efence_buff1 = buff1 + size;
|
||||||
|
efence_buff2 = buff2 + size;
|
||||||
|
efence_buff3 = buff3 + size;
|
||||||
|
|
||||||
|
gf_vect_mul(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2);
|
||||||
|
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n",
|
||||||
|
i, efence_buff1[i], efence_buff2[i],
|
||||||
|
gf_mul(2, efence_buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3);
|
||||||
|
|
||||||
|
// Check reference function
|
||||||
|
for (i = 0; i < TEST_SIZE - size; i++)
|
||||||
|
if (efence_buff2[i] != efence_buff3[i]) {
|
||||||
|
printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n",
|
||||||
|
i, a, efence_buff2[i], efence_buff3[i],
|
||||||
|
gf_mul(2, efence_buff1[i]));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
putchar('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" done: Pass\n");
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user