Blame - third_party/chromium/crypto/p224.cc - weave/libweave

blob: 81bce3a851cd26b7cf98541b8bc259c71f97276d [file] [log] [blame]

Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	1	// Copyright 2012 The Chromium OS Authors. All rights reserved.
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// This is an implementation of the P224 elliptic curve group. It's written to
				6	// be short and simple rather than fast, although it's still constant-time.
				7	//
				8	// See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
				9
Vitaly Buka	9e5b683	2015-10-14 15:57:14 -0700	[diff] [blame]	10	#include "third_party/chromium/crypto/p224.h"
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	11
				12	#include <string.h>
				13
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	14	namespace crypto {
				15	namespace p224 {
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	16
				17	namespace {
				18
Vitaly Buka	0d50107	2015-08-18 18:09:46 -0700	[diff] [blame]	19	inline uint32 ByteSwap(uint32 x) {
				20	return ((x & 0x000000fful) << 24) \| ((x & 0x0000ff00ul) << 8) \|
				21	((x & 0x00ff0000ul) >> 8) \| ((x & 0xff000000ul) >> 24);
				22	}
				23
				24	inline uint32 HostToNet32(uint32 x) {
				25	#if defined(ARCH_CPU_LITTLE_ENDIAN)
				26	return ByteSwap(x);
				27	#else
				28	return x;
				29	#endif
				30	}
				31
				32	inline uint32 NetToHost32(uint32 x) {
				33	#if defined(ARCH_CPU_LITTLE_ENDIAN)
				34	return ByteSwap(x);
				35	#else
				36	return x;
				37	#endif
				38	}
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	39
				40	// Field element functions.
				41	//
				42	// The field that we're dealing with is ℤ/pℤ where p = 2224 - 296 + 1.
				43	//
				44	// Field elements are represented by a FieldElement, which is a typedef to an
				45	// array of 8 uint32's. The value of a FieldElement, a, is:
				46	// a[0] + 228·a[1] + 256·a[1] + ... + 2**196·a[7]
				47	//
				48	// Using 28-bit limbs means that there's only 4 bits of headroom, which is less
				49	// than we would really like. But it has the useful feature that we hit 2**224
				50	// exactly, making the reflections during a reduce much nicer.
				51
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	52	// kP is the P224 prime.
				53	const FieldElement kP = {
				54	1, 0, 0, 268431360,
				55	268435455, 268435455, 268435455, 268435455,
				56	};
				57
				58	void Contract(FieldElement* inout);
				59
				60	// IsZero returns 0xffffffff if a == 0 mod p and 0 otherwise.
				61	uint32 IsZero(const FieldElement& a) {
				62	FieldElement minimal;
				63	memcpy(&minimal, &a, sizeof(minimal));
				64	Contract(&minimal);
				65
				66	uint32 is_zero = 0, is_p = 0;
				67	for (unsigned i = 0; i < 8; i++) {
				68	is_zero \|= minimal[i];
				69	is_p \|= minimal[i] - kP[i];
				70	}
				71
				72	// If either is_zero or is_p is 0, then we should return 1.
				73	is_zero \|= is_zero >> 16;
				74	is_zero \|= is_zero >> 8;
				75	is_zero \|= is_zero >> 4;
				76	is_zero \|= is_zero >> 2;
				77	is_zero \|= is_zero >> 1;
				78
				79	is_p \|= is_p >> 16;
				80	is_p \|= is_p >> 8;
				81	is_p \|= is_p >> 4;
				82	is_p \|= is_p >> 2;
				83	is_p \|= is_p >> 1;
				84
				85	// For is_zero and is_p, the LSB is 0 iff all the bits are zero.
				86	is_zero &= is_p & 1;
				87	is_zero = (~is_zero) << 31;
				88	is_zero = static_cast<int32>(is_zero) >> 31;
				89	return is_zero;
				90	}
				91
				92	// Add computes *out = a+b
				93	//
				94	// a[i] + b[i] < 2**32
				95	void Add(FieldElement* out, const FieldElement& a, const FieldElement& b) {
				96	for (int i = 0; i < 8; i++) {
				97	(*out)[i] = a[i] + b[i];
				98	}
				99	}
				100
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	101	static const uint32 kTwo31p3 = (1u << 31) + (1u << 3);
				102	static const uint32 kTwo31m3 = (1u << 31) - (1u << 3);
				103	static const uint32 kTwo31m15m3 = (1u << 31) - (1u << 15) - (1u << 3);
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	104	// kZero31ModP is 0 mod p where bit 31 is set in all limbs so that we can
				105	// subtract smaller amounts without underflow. See the section "Subtraction" in
				106	// [1] for why.
				107	static const FieldElement kZero31ModP = {
				108	kTwo31p3, kTwo31m3, kTwo31m3, kTwo31m15m3,
				109	kTwo31m3, kTwo31m3, kTwo31m3, kTwo31m3
				110	};
				111
				112	// Subtract computes *out = a-b
				113	//
				114	// a[i], b[i] < 2**30
				115	// out[i] < 2**32
				116	void Subtract(FieldElement* out, const FieldElement& a, const FieldElement& b) {
				117	for (int i = 0; i < 8; i++) {
				118	// See the section on "Subtraction" in [1] for details.
				119	(*out)[i] = a[i] + kZero31ModP[i] - b[i];
				120	}
				121	}
				122
				123	static const uint64 kTwo63p35 = (1ull<<63) + (1ull<<35);
				124	static const uint64 kTwo63m35 = (1ull<<63) - (1ull<<35);
				125	static const uint64 kTwo63m35m19 = (1ull<<63) - (1ull<<35) - (1ull<<19);
				126	// kZero63ModP is 0 mod p where bit 63 is set in all limbs. See the section
				127	// "Subtraction" in [1] for why.
				128	static const uint64 kZero63ModP[8] = {
				129	kTwo63p35, kTwo63m35, kTwo63m35, kTwo63m35,
				130	kTwo63m35m19, kTwo63m35, kTwo63m35, kTwo63m35,
				131	};
				132
				133	static const uint32 kBottom28Bits = 0xfffffff;
				134
				135	// LargeFieldElement also represents an element of the field. The limbs are
				136	// still spaced 28-bits apart and in little-endian order. So the limbs are at
				137	// 0, 28, 56, ..., 392 bits, each 64-bits wide.
				138	typedef uint64 LargeFieldElement[15];
				139
				140	// ReduceLarge converts a LargeFieldElement to a FieldElement.
				141	//
				142	// in[i] < 2**62
				143
				144	// GCC 4.9 incorrectly vectorizes the first coefficient elimination loop, so
				145	// disable that optimization via pragma. Don't use the pragma under Clang, since
				146	// clang doesn't understand it.
				147	// TODO(wez): Remove this when crbug.com/439566 is fixed.
				148	#if defined(__GNUC__) && !defined(__clang__)
				149	#pragma GCC optimize("no-tree-vectorize")
				150	#endif
				151
				152	void ReduceLarge(FieldElement* out, LargeFieldElement* inptr) {
				153	LargeFieldElement& in(*inptr);
				154
				155	for (int i = 0; i < 8; i++) {
				156	in[i] += kZero63ModP[i];
				157	}
				158
				159	// Eliminate the coefficients at 2**224 and greater while maintaining the
				160	// same value mod p.
				161	for (int i = 14; i >= 8; i--) {
				162	in[i-8] -= in[i]; // reflection off the "+1" term of p.
				163	in[i-5] += (in[i] & 0xffff) << 12; // part of the "-2**96" reflection.
				164	in[i-4] += in[i] >> 16; // the rest of the "-2**96" reflection.
				165	}
				166	in[8] = 0;
				167	// in[0..8] < 2**64
				168
				169	// As the values become small enough, we start to store them in \|out\| and use
				170	// 32-bit operations.
				171	for (int i = 1; i < 8; i++) {
				172	in[i+1] += in[i] >> 28;
				173	(*out)[i] = static_cast<uint32>(in[i] & kBottom28Bits);
				174	}
				175	// Eliminate the term at 2*224 that we introduced while keeping the same
				176	// value mod p.
				177	in[0] -= in[8]; // reflection off the "+1" term of p.
				178	(out)[3] += static_cast<uint32>(in[8] & 0xffff) << 12; // "-2*96" term
				179	(out)[4] += static_cast<uint32>(in[8] >> 16); // rest of "-2*96" term
				180	// in[0] < 2**64
				181	// out[3] < 2**29
				182	// out[4] < 2**29
				183	// out[1,2,5..7] < 2**28
				184
				185	(*out)[0] = static_cast<uint32>(in[0] & kBottom28Bits);
				186	(*out)[1] += static_cast<uint32>((in[0] >> 28) & kBottom28Bits);
				187	(*out)[2] += static_cast<uint32>(in[0] >> 56);
				188	// out[0] < 2**28
				189	// out[1..4] < 2**29
				190	// out[5..7] < 2**28
				191	}
				192
				193	// TODO(wez): Remove this when crbug.com/439566 is fixed.
				194	#if defined(__GNUC__) && !defined(__clang__)
				195	// Reenable "tree-vectorize" optimization if it got disabled for ReduceLarge.
				196	#pragma GCC reset_options
				197	#endif
				198
				199	// Mul computes out = ab
				200	//
				201	// a[i] < 229, b[i] < 230 (or vice versa)
				202	// out[i] < 2**29
				203	void Mul(FieldElement* out, const FieldElement& a, const FieldElement& b) {
				204	LargeFieldElement tmp;
				205	memset(&tmp, 0, sizeof(tmp));
				206
				207	for (int i = 0; i < 8; i++) {
				208	for (int j = 0; j < 8; j++) {
				209	tmp[i+j] += static_cast<uint64>(a[i]) * static_cast<uint64>(b[j]);
				210	}
				211	}
				212
				213	ReduceLarge(out, &tmp);
				214	}
				215
				216	// Square computes out = aa
				217	//
				218	// a[i] < 2**29
				219	// out[i] < 2**29
				220	void Square(FieldElement* out, const FieldElement& a) {
				221	LargeFieldElement tmp;
				222	memset(&tmp, 0, sizeof(tmp));
				223
				224	for (int i = 0; i < 8; i++) {
				225	for (int j = 0; j <= i; j++) {
				226	uint64 r = static_cast<uint64>(a[i]) * static_cast<uint64>(a[j]);
				227	if (i == j) {
				228	tmp[i+j] += r;
				229	} else {
				230	tmp[i+j] += r << 1;
				231	}
				232	}
				233	}
				234
				235	ReduceLarge(out, &tmp);
				236	}
				237
				238	// Reduce reduces the coefficients of in_out to smaller bounds.
				239	//
				240	// On entry: a[i] < 231 + 230
				241	// On exit: a[i] < 2**29
				242	void Reduce(FieldElement* in_out) {
				243	FieldElement& a = *in_out;
				244
				245	for (int i = 0; i < 7; i++) {
				246	a[i+1] += a[i] >> 28;
				247	a[i] &= kBottom28Bits;
				248	}
				249	uint32 top = a[7] >> 28;
				250	a[7] &= kBottom28Bits;
				251
				252	// top < 2**4
				253	// Constant-time: mask = (top != 0) ? 0xffffffff : 0
				254	uint32 mask = top;
				255	mask \|= mask >> 2;
				256	mask \|= mask >> 1;
				257	mask <<= 31;
				258	mask = static_cast<uint32>(static_cast<int32>(mask) >> 31);
				259
				260	// Eliminate top while maintaining the same value mod p.
				261	a[0] -= top;
				262	a[3] += top << 12;
				263
				264	// We may have just made a[0] negative but, if we did, then we must
				265	// have added something to a[3], thus it's > 2**12. Therefore we can
				266	// carry down to a[0].
				267	a[3] -= 1 & mask;
				268	a[2] += mask & ((1<<28) - 1);
				269	a[1] += mask & ((1<<28) - 1);
				270	a[0] += mask & (1<<28);
				271	}
				272
				273	// Invert calcuates out = in-1 by computing in(2224 - 2*96 - 1), i.e.
				274	// Fermat's little theorem.
				275	void Invert(FieldElement* out, const FieldElement& in) {
				276	FieldElement f1, f2, f3, f4;
				277
				278	Square(&f1, in); // 2
				279	Mul(&f1, f1, in); // 2**2 - 1
				280	Square(&f1, f1); // 2**3 - 2
				281	Mul(&f1, f1, in); // 2**3 - 1
				282	Square(&f2, f1); // 2**4 - 2
				283	Square(&f2, f2); // 2**5 - 4
				284	Square(&f2, f2); // 2**6 - 8
				285	Mul(&f1, f1, f2); // 2**6 - 1
				286	Square(&f2, f1); // 2**7 - 2
				287	for (int i = 0; i < 5; i++) { // 212 - 26
				288	Square(&f2, f2);
				289	}
				290	Mul(&f2, f2, f1); // 2**12 - 1
				291	Square(&f3, f2); // 2**13 - 2
				292	for (int i = 0; i < 11; i++) { // 224 - 212
				293	Square(&f3, f3);
				294	}
				295	Mul(&f2, f3, f2); // 2**24 - 1
				296	Square(&f3, f2); // 2**25 - 2
				297	for (int i = 0; i < 23; i++) { // 248 - 224
				298	Square(&f3, f3);
				299	}
				300	Mul(&f3, f3, f2); // 2**48 - 1
				301	Square(&f4, f3); // 2**49 - 2
				302	for (int i = 0; i < 47; i++) { // 296 - 248
				303	Square(&f4, f4);
				304	}
				305	Mul(&f3, f3, f4); // 2**96 - 1
				306	Square(&f4, f3); // 2**97 - 2
				307	for (int i = 0; i < 23; i++) { // 2120 - 224
				308	Square(&f4, f4);
				309	}
				310	Mul(&f2, f4, f2); // 2**120 - 1
				311	for (int i = 0; i < 6; i++) { // 2126 - 26
				312	Square(&f2, f2);
				313	}
				314	Mul(&f1, f1, f2); // 2**126 - 1
				315	Square(&f1, f1); // 2**127 - 2
				316	Mul(&f1, f1, in); // 2**127 - 1
				317	for (int i = 0; i < 97; i++) { // 2224 - 297
				318	Square(&f1, f1);
				319	}
				320	Mul(out, f1, f3); // 2224 - 296 - 1
				321	}
				322
				323	// Contract converts a FieldElement to its minimal, distinguished form.
				324	//
				325	// On entry, in[i] < 2**29
				326	// On exit, in[i] < 2**28
				327	void Contract(FieldElement* inout) {
				328	FieldElement& out = *inout;
				329
				330	// Reduce the coefficients to < 2**28.
				331	for (int i = 0; i < 7; i++) {
				332	out[i+1] += out[i] >> 28;
				333	out[i] &= kBottom28Bits;
				334	}
				335	uint32 top = out[7] >> 28;
				336	out[7] &= kBottom28Bits;
				337
				338	// Eliminate top while maintaining the same value mod p.
				339	out[0] -= top;
				340	out[3] += top << 12;
				341
				342	// We may just have made out[0] negative. So we carry down. If we made
				343	// out[0] negative then we know that out[3] is sufficiently positive
				344	// because we just added to it.
				345	for (int i = 0; i < 3; i++) {
				346	uint32 mask = static_cast<uint32>(static_cast<int32>(out[i]) >> 31);
				347	out[i] += (1 << 28) & mask;
				348	out[i+1] -= 1 & mask;
				349	}
				350
				351	// We might have pushed out[3] over 2**28 so we perform another, partial
				352	// carry chain.
				353	for (int i = 3; i < 7; i++) {
				354	out[i+1] += out[i] >> 28;
				355	out[i] &= kBottom28Bits;
				356	}
				357	top = out[7] >> 28;
				358	out[7] &= kBottom28Bits;
				359
				360	// Eliminate top while maintaining the same value mod p.
				361	out[0] -= top;
				362	out[3] += top << 12;
				363
				364	// There are two cases to consider for out[3]:
				365	// 1) The first time that we eliminated top, we didn't push out[3] over
				366	// 2**28. In this case, the partial carry chain didn't change any values
				367	// and top is zero.
				368	// 2) We did push out[3] over 2**28 the first time that we eliminated top.
				369	// The first value of top was in [0..16), therefore, prior to eliminating
				370	// the first top, 0xfff1000 <= out[3] <= 0xfffffff. Therefore, after
				371	// overflowing and being reduced by the second carry chain, out[3] <=
				372	// 0xf000. Thus it cannot have overflowed when we eliminated top for the
				373	// second time.
				374
				375	// Again, we may just have made out[0] negative, so do the same carry down.
				376	// As before, if we made out[0] negative then we know that out[3] is
				377	// sufficiently positive.
				378	for (int i = 0; i < 3; i++) {
				379	uint32 mask = static_cast<uint32>(static_cast<int32>(out[i]) >> 31);
				380	out[i] += (1 << 28) & mask;
				381	out[i+1] -= 1 & mask;
				382	}
				383
				384	// The value is < 2**224, but maybe greater than p. In order to reduce to a
				385	// unique, minimal value we see if the value is >= p and, if so, subtract p.
				386
				387	// First we build a mask from the top four limbs, which must all be
				388	// equal to bottom28Bits if the whole value is >= p. If top_4_all_ones
				389	// ends up with any zero bits in the bottom 28 bits, then this wasn't
				390	// true.
				391	uint32 top_4_all_ones = 0xffffffffu;
				392	for (int i = 4; i < 8; i++) {
				393	top_4_all_ones &= out[i];
				394	}
				395	top_4_all_ones \|= 0xf0000000;
				396	// Now we replicate any zero bits to all the bits in top_4_all_ones.
				397	top_4_all_ones &= top_4_all_ones >> 16;
				398	top_4_all_ones &= top_4_all_ones >> 8;
				399	top_4_all_ones &= top_4_all_ones >> 4;
				400	top_4_all_ones &= top_4_all_ones >> 2;
				401	top_4_all_ones &= top_4_all_ones >> 1;
				402	top_4_all_ones =
				403	static_cast<uint32>(static_cast<int32>(top_4_all_ones << 31) >> 31);
				404
				405	// Now we test whether the bottom three limbs are non-zero.
				406	uint32 bottom_3_non_zero = out[0] \| out[1] \| out[2];
				407	bottom_3_non_zero \|= bottom_3_non_zero >> 16;
				408	bottom_3_non_zero \|= bottom_3_non_zero >> 8;
				409	bottom_3_non_zero \|= bottom_3_non_zero >> 4;
				410	bottom_3_non_zero \|= bottom_3_non_zero >> 2;
				411	bottom_3_non_zero \|= bottom_3_non_zero >> 1;
				412	bottom_3_non_zero =
				413	static_cast<uint32>(static_cast<int32>(bottom_3_non_zero) >> 31);
				414
				415	// Everything depends on the value of out[3].
				416	// If it's > 0xffff000 and top_4_all_ones != 0 then the whole value is >= p
				417	// If it's = 0xffff000 and top_4_all_ones != 0 and bottom_3_non_zero != 0,
				418	// then the whole value is >= p
				419	// If it's < 0xffff000, then the whole value is < p
				420	uint32 n = out[3] - 0xffff000;
				421	uint32 out_3_equal = n;
				422	out_3_equal \|= out_3_equal >> 16;
				423	out_3_equal \|= out_3_equal >> 8;
				424	out_3_equal \|= out_3_equal >> 4;
				425	out_3_equal \|= out_3_equal >> 2;
				426	out_3_equal \|= out_3_equal >> 1;
				427	out_3_equal =
				428	~static_cast<uint32>(static_cast<int32>(out_3_equal << 31) >> 31);
				429
				430	// If out[3] > 0xffff000 then n's MSB will be zero.
				431	uint32 out_3_gt = ~static_cast<uint32>(static_cast<int32>(n << 31) >> 31);
				432
				433	uint32 mask = top_4_all_ones & ((out_3_equal & bottom_3_non_zero) \| out_3_gt);
				434	out[0] -= 1 & mask;
				435	out[3] -= 0xffff000 & mask;
				436	out[4] -= 0xfffffff & mask;
				437	out[5] -= 0xfffffff & mask;
				438	out[6] -= 0xfffffff & mask;
				439	out[7] -= 0xfffffff & mask;
				440	}
				441
				442
				443	// Group element functions.
				444	//
				445	// These functions deal with group elements. The group is an elliptic curve
				446	// group with a = -3 defined in FIPS 186-3, section D.2.2.
				447
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	448	// kB is parameter of the elliptic curve.
				449	const FieldElement kB = {
				450	55967668, 11768882, 265861671, 185302395,
				451	39211076, 180311059, 84673715, 188764328,
				452	};
				453
				454	void CopyConditional(Point* out, const Point& a, uint32 mask);
				455	void DoubleJacobian(Point* out, const Point& a);
				456
				457	// AddJacobian computes *out = a+b where a != b.
				458	void AddJacobian(Point *out,
				459	const Point& a,
				460	const Point& b) {
				461	// See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
				462	FieldElement z1z1, z2z2, u1, u2, s1, s2, h, i, j, r, v;
				463
				464	uint32 z1_is_zero = IsZero(a.z);
				465	uint32 z2_is_zero = IsZero(b.z);
				466
				467	// Z1Z1 = Z1²
				468	Square(&z1z1, a.z);
				469
				470	// Z2Z2 = Z2²
				471	Square(&z2z2, b.z);
				472
				473	// U1 = X1*Z2Z2
				474	Mul(&u1, a.x, z2z2);
				475
				476	// U2 = X2*Z1Z1
				477	Mul(&u2, b.x, z1z1);
				478
				479	// S1 = Y1Z2Z2Z2
				480	Mul(&s1, b.z, z2z2);
				481	Mul(&s1, a.y, s1);
				482
				483	// S2 = Y2Z1Z1Z1
				484	Mul(&s2, a.z, z1z1);
				485	Mul(&s2, b.y, s2);
				486
				487	// H = U2-U1
				488	Subtract(&h, u2, u1);
				489	Reduce(&h);
				490	uint32 x_equal = IsZero(h);
				491
				492	// I = (2*H)²
				493	for (int k = 0; k < 8; k++) {
				494	i[k] = h[k] << 1;
				495	}
				496	Reduce(&i);
				497	Square(&i, i);
				498
				499	// J = H*I
				500	Mul(&j, h, i);
				501	// r = 2*(S2-S1)
				502	Subtract(&r, s2, s1);
				503	Reduce(&r);
				504	uint32 y_equal = IsZero(r);
				505
				506	if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
				507	// The two input points are the same therefore we must use the dedicated
				508	// doubling function as the slope of the line is undefined.
				509	DoubleJacobian(out, a);
				510	return;
				511	}
				512
				513	for (int k = 0; k < 8; k++) {
				514	r[k] <<= 1;
				515	}
				516	Reduce(&r);
				517
				518	// V = U1*I
				519	Mul(&v, u1, i);
				520
				521	// Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H
				522	Add(&z1z1, z1z1, z2z2);
				523	Add(&z2z2, a.z, b.z);
				524	Reduce(&z2z2);
				525	Square(&z2z2, z2z2);
				526	Subtract(&out->z, z2z2, z1z1);
				527	Reduce(&out->z);
				528	Mul(&out->z, out->z, h);
				529
				530	// X3 = r²-J-2*V
				531	for (int k = 0; k < 8; k++) {
				532	z1z1[k] = v[k] << 1;
				533	}
				534	Add(&z1z1, j, z1z1);
				535	Reduce(&z1z1);
				536	Square(&out->x, r);
				537	Subtract(&out->x, out->x, z1z1);
				538	Reduce(&out->x);
				539
				540	// Y3 = r(V-X3)-2S1*J
				541	for (int k = 0; k < 8; k++) {
				542	s1[k] <<= 1;
				543	}
				544	Mul(&s1, s1, j);
				545	Subtract(&z1z1, v, out->x);
				546	Reduce(&z1z1);
				547	Mul(&z1z1, z1z1, r);
				548	Subtract(&out->y, z1z1, s1);
				549	Reduce(&out->y);
				550
				551	CopyConditional(out, a, z2_is_zero);
				552	CopyConditional(out, b, z1_is_zero);
				553	}
				554
				555	// DoubleJacobian computes *out = a+a.
				556	void DoubleJacobian(Point* out, const Point& a) {
				557	// See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
				558	FieldElement delta, gamma, beta, alpha, t;
				559
				560	Square(&delta, a.z);
				561	Square(&gamma, a.y);
				562	Mul(&beta, a.x, gamma);
				563
				564	// alpha = 3(X1-delta)(X1+delta)
				565	Add(&t, a.x, delta);
				566	for (int i = 0; i < 8; i++) {
				567	t[i] += t[i] << 1;
				568	}
				569	Reduce(&t);
				570	Subtract(&alpha, a.x, delta);
				571	Reduce(&alpha);
				572	Mul(&alpha, alpha, t);
				573
				574	// Z3 = (Y1+Z1)²-gamma-delta
				575	Add(&out->z, a.y, a.z);
				576	Reduce(&out->z);
				577	Square(&out->z, out->z);
				578	Subtract(&out->z, out->z, gamma);
				579	Reduce(&out->z);
				580	Subtract(&out->z, out->z, delta);
				581	Reduce(&out->z);
				582
				583	// X3 = alpha²-8*beta
				584	for (int i = 0; i < 8; i++) {
				585	delta[i] = beta[i] << 3;
				586	}
				587	Reduce(&delta);
				588	Square(&out->x, alpha);
				589	Subtract(&out->x, out->x, delta);
				590	Reduce(&out->x);
				591
				592	// Y3 = alpha(4beta-X3)-8*gamma²
				593	for (int i = 0; i < 8; i++) {
				594	beta[i] <<= 2;
				595	}
				596	Reduce(&beta);
				597	Subtract(&beta, beta, out->x);
				598	Reduce(&beta);
				599	Square(&gamma, gamma);
				600	for (int i = 0; i < 8; i++) {
				601	gamma[i] <<= 3;
				602	}
				603	Reduce(&gamma);
				604	Mul(&out->y, alpha, beta);
				605	Subtract(&out->y, out->y, gamma);
				606	Reduce(&out->y);
				607	}
				608
				609	// CopyConditional sets *out=a if mask is 0xffffffff. mask must be either 0 of
				610	// 0xffffffff.
				611	void CopyConditional(Point* out,
				612	const Point& a,
				613	uint32 mask) {
				614	for (int i = 0; i < 8; i++) {
				615	out->x[i] ^= mask & (a.x[i] ^ out->x[i]);
				616	out->y[i] ^= mask & (a.y[i] ^ out->y[i]);
				617	out->z[i] ^= mask & (a.z[i] ^ out->z[i]);
				618	}
				619	}
				620
				621	// ScalarMult calculates out = ascalar where scalar is a big-endian number of
				622	// length scalar_len and != 0.
				623	void ScalarMult(Point* out, const Point& a,
				624	const uint8* scalar, size_t scalar_len) {
				625	memset(out, 0, sizeof(*out));
				626	Point tmp;
				627
				628	for (size_t i = 0; i < scalar_len; i++) {
				629	for (unsigned int bit_num = 0; bit_num < 8; bit_num++) {
				630	DoubleJacobian(out, *out);
				631	uint32 bit = static_cast<uint32>(static_cast<int32>(
				632	(((scalar[i] >> (7 - bit_num)) & 1) << 31) >> 31));
				633	AddJacobian(&tmp, a, *out);
				634	CopyConditional(out, tmp, bit);
				635	}
				636	}
				637	}
				638
				639	// Get224Bits reads 7 words from in and scatters their contents in
				640	// little-endian form into 8 words at out, 28 bits per output word.
				641	void Get224Bits(uint32* out, const uint32* in) {
				642	out[0] = NetToHost32(in[6]) & kBottom28Bits;
				643	out[1] = ((NetToHost32(in[5]) << 4) \|
				644	(NetToHost32(in[6]) >> 28)) & kBottom28Bits;
				645	out[2] = ((NetToHost32(in[4]) << 8) \|
				646	(NetToHost32(in[5]) >> 24)) & kBottom28Bits;
				647	out[3] = ((NetToHost32(in[3]) << 12) \|
				648	(NetToHost32(in[4]) >> 20)) & kBottom28Bits;
				649	out[4] = ((NetToHost32(in[2]) << 16) \|
				650	(NetToHost32(in[3]) >> 16)) & kBottom28Bits;
				651	out[5] = ((NetToHost32(in[1]) << 20) \|
				652	(NetToHost32(in[2]) >> 12)) & kBottom28Bits;
				653	out[6] = ((NetToHost32(in[0]) << 24) \|
				654	(NetToHost32(in[1]) >> 8)) & kBottom28Bits;
				655	out[7] = (NetToHost32(in[0]) >> 4) & kBottom28Bits;
				656	}
				657
				658	// Put224Bits performs the inverse operation to Get224Bits: taking 28 bits from
				659	// each of 8 input words and writing them in big-endian order to 7 words at
				660	// out.
				661	void Put224Bits(uint32* out, const uint32* in) {
				662	out[6] = HostToNet32((in[0] >> 0) \| (in[1] << 28));
				663	out[5] = HostToNet32((in[1] >> 4) \| (in[2] << 24));
				664	out[4] = HostToNet32((in[2] >> 8) \| (in[3] << 20));
				665	out[3] = HostToNet32((in[3] >> 12) \| (in[4] << 16));
				666	out[2] = HostToNet32((in[4] >> 16) \| (in[5] << 12));
				667	out[1] = HostToNet32((in[5] >> 20) \| (in[6] << 8));
				668	out[0] = HostToNet32((in[6] >> 24) \| (in[7] << 4));
				669	}
				670
				671	} // anonymous namespace
				672
Vitaly Buka	0d50107	2015-08-18 18:09:46 -0700	[diff] [blame]	673	bool Point::SetFromString(const std::string& in) {
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	674	if (in.size() != 2*28)
				675	return false;
				676	const uint32* inwords = reinterpret_cast<const uint32*>(in.data());
				677	Get224Bits(x, inwords);
				678	Get224Bits(y, inwords + 7);
				679	memset(&z, 0, sizeof(z));
				680	z[0] = 1;
				681
				682	// Check that the point is on the curve, i.e. that y² = x³ - 3x + b.
				683	FieldElement lhs;
				684	Square(&lhs, y);
				685	Contract(&lhs);
				686
				687	FieldElement rhs;
				688	Square(&rhs, x);
				689	Mul(&rhs, x, rhs);
				690
				691	FieldElement three_x;
				692	for (int i = 0; i < 8; i++) {
				693	three_x[i] = x[i] * 3;
				694	}
				695	Reduce(&three_x);
				696	Subtract(&rhs, rhs, three_x);
				697	Reduce(&rhs);
				698
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	699	Add(&rhs, rhs, kB);
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	700	Contract(&rhs);
				701	return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
				702	}
				703
				704	std::string Point::ToString() const {
				705	FieldElement zinv, zinv_sq, xx, yy;
				706
				707	// If this is the point at infinity we return a string of all zeros.
				708	if (IsZero(this->z)) {
				709	static const char zeros[56] = {0};
				710	return std::string(zeros, sizeof(zeros));
				711	}
				712
				713	Invert(&zinv, this->z);
				714	Square(&zinv_sq, zinv);
				715	Mul(&xx, x, zinv_sq);
				716	Mul(&zinv_sq, zinv_sq, zinv);
				717	Mul(&yy, y, zinv_sq);
				718
				719	Contract(&xx);
				720	Contract(&yy);
				721
				722	uint32 outwords[14];
				723	Put224Bits(outwords, xx);
				724	Put224Bits(outwords + 7, yy);
				725	return std::string(reinterpret_cast<const char*>(outwords), sizeof(outwords));
				726	}
				727
				728	void ScalarMult(const Point& in, const uint8* scalar, Point* out) {
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	729	ScalarMult(out, in, scalar, 28);
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	730	}
				731
				732	// kBasePoint is the base point (generator) of the elliptic curve group.
				733	static const Point kBasePoint = {
				734	{22813985, 52956513, 34677300, 203240812,
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	735	12143107, 133374265, 225162431, 191946955},
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	736	{83918388, 223877528, 122119236, 123340192,
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	737	266784067, 263504429, 146143011, 198407736},
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	738	{1, 0, 0, 0, 0, 0, 0, 0},
				739	};
				740
				741	void ScalarBaseMult(const uint8* scalar, Point* out) {
Vitaly Buka	9ba72a8	2015-08-06 17:36:17 -0700	[diff] [blame]	742	ScalarMult(out, kBasePoint, scalar, 28);
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	743	}
				744
				745	void Add(const Point& a, const Point& b, Point* out) {
				746	AddJacobian(out, a, b);
				747	}
				748
				749	void Negate(const Point& in, Point* out) {
				750	// Guide to elliptic curve cryptography, page 89 suggests that (X : X+Y : Z)
				751	// is the negative in Jacobian coordinates, but it doesn't actually appear to
				752	// be true in testing so this performs the negation in affine coordinates.
				753	FieldElement zinv, zinv_sq, y;
				754	Invert(&zinv, in.z);
				755	Square(&zinv_sq, zinv);
				756	Mul(&out->x, in.x, zinv_sq);
				757	Mul(&zinv_sq, zinv_sq, zinv);
				758	Mul(&y, in.y, zinv_sq);
				759
				760	Subtract(&out->y, kP, y);
				761	Reduce(&out->y);
				762
				763	memset(&out->z, 0, sizeof(out->z));
				764	out->z[0] = 1;
				765	}
				766
				767	} // namespace p224
Vitaly Buka	6ca6a23	2015-08-06 17:32:43 -0700	[diff] [blame]	768	} // namespace crypto