From 7f539b6033a07550000d33e8d6e4f8bf0fbf96cc Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Sun, 16 Feb 2014 15:17:21 +0100
Subject: [PATCH 1/9] ogl: optimize real xfb a bit

Both nvidia + mesa seems not to optimize x / (2**n) to x >> n, so we do it ourself.
---
 Source/Core/VideoBackends/OGL/TextureConverter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
index 753bd64ada..dbbb605f6b 100644
--- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
@@ -117,7 +117,7 @@ void CreatePrograms()
 		"	ivec2 uv = ivec2(gl_FragCoord.xy);\n"
 			// We switch top/bottom here. TODO: move this to screen blit.
 		"	ivec2 ts = textureSize(samp9, 0);\n"
-		"	vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1), 0);\n"
+		"	vec4 c0 = texelFetch(samp9, ivec2(uv.x>>1, ts.y-uv.y-1), 0);\n"
 		"	float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n"
 		"	float yComp = 1.164 * (y - 0.0625);\n"
 		"	float uComp = c0.g - 0.5;\n"

From f99c8a0b70c604565731d2c7c000629806d755b4 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Sun, 16 Feb 2014 19:13:48 +0100
Subject: [PATCH 2/9] merge common parts of encoding shaders

---
 Source/Core/VideoCommon/TextureConversionShader.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index a7db33418f..6b9e1af370 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -105,6 +105,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 
 	WRITE(p, "  sampleUv.x = xib + halfxb * %d;\n", blkW);
 	WRITE(p, "  sampleUv.y = yb + xoff;\n");
+	WRITE(p, "  bool first = xb == (halfxb * 2);\n");
 }
 
 void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
@@ -373,8 +374,6 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType)
 {
 	WriteSwizzler(p, GX_TF_RGBA8, ApiType);
 
-	WRITE(p, "  bool first = xb == (halfxb * 2);\n");
-
 	WRITE(p, "  float4 texSample;\n");
 	WRITE(p, "  float4 color0;\n");
 	WRITE(p, "  float4 color1;\n");
@@ -563,8 +562,6 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType)
 {
 	WriteSwizzler(p, GX_TF_Z24X8, ApiType);
 
-	WRITE(p, "  bool first = xb == (halfxb * 2);\n");
-
 	WRITE(p, "  float depth0;\n");
 	WRITE(p, "  float depth1;\n");
 	WRITE(p, "  float3 expanded0;\n");

From 94da4e1aa29a68cb723898a63bd01568bbbd8c4f Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Mon, 24 Feb 2014 10:20:53 +0100
Subject: [PATCH 3/9] MathUtil: Change Log2 return value to int

Log2(u64) can't be bigger than 63, so there is no need in forcing a 64 bit value.
So just using a common int seems more natural.
---
 Source/Core/Common/MathUtil.h          | 4 ++--
 Source/Core/DolphinWX/GameListCtrl.cpp | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Source/Core/Common/MathUtil.h b/Source/Core/Common/MathUtil.h
index da12822f5d..14b9309fc6 100644
--- a/Source/Core/Common/MathUtil.h
+++ b/Source/Core/Common/MathUtil.h
@@ -150,7 +150,7 @@ float MathFloatVectorSum(const std::vector<float>&);
 #define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
 
 // Rounds down. 0 -> undefined
-inline u64 Log2(u64 val)
+inline int Log2(u64 val)
 {
 #if defined(__GNUC__)
 	return 63 - __builtin_clzll(val);
@@ -161,7 +161,7 @@ inline u64 Log2(u64 val)
 	return result;
 
 #else
-	u64 result = -1;
+	int result = -1;
 	while (val != 0)
 	{
 		val >>= 1;
diff --git a/Source/Core/DolphinWX/GameListCtrl.cpp b/Source/Core/DolphinWX/GameListCtrl.cpp
index bd420f8156..6029c741a4 100644
--- a/Source/Core/DolphinWX/GameListCtrl.cpp
+++ b/Source/Core/DolphinWX/GameListCtrl.cpp
@@ -413,12 +413,12 @@ wxString NiceSizeFormat(u64 _size)
 {
 	const char* const unit_symbols[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"};
 
-	auto const unit = Log2(std::max<u64>(_size, 1)) / 10;
-	auto const unit_size = (1 << (unit * 10));
+	const u64 unit = Log2(std::max<u64>(_size, 1)) / 10;
+	const u64 unit_size = (1 << (unit * 10));
 
 	// ugly rounding integer math
-	auto const value = (_size + unit_size / 2) / unit_size;
-	auto const frac = (_size % unit_size * 10 + unit_size / 2) / unit_size % 10;
+	const u64 value = (_size + unit_size / 2) / unit_size;
+	const u64 frac = (_size % unit_size * 10 + unit_size / 2) / unit_size % 10;
 
 	return StrToWxStr(StringFromFormat("%" PRIu64 ".%" PRIu64 " %s", value, frac, unit_symbols[unit]));
 }

From bd3beeb1843be807629bf9680fe7d27497606801 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Mon, 24 Feb 2014 16:09:20 +0100
Subject: [PATCH 4/9] TextureConverter: Use Log2() and shifts instead of
 multiplications/divisions

---
 .../VideoCommon/TextureConversionShader.cpp   | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index 6b9e1af370..0fca9df552 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -14,6 +14,7 @@
 #include "VideoCommon/TextureConversionShader.h"
 #include "VideoCommon/TextureDecoder.h"
 #include "VideoCommon/VideoConfig.h"
+#include "Common/MathUtil.h"
 
 #define WRITE p+=sprintf
 
@@ -90,22 +91,22 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	"  float2 uv0 = float2(0.0, 0.0);\n"
 	);
 
-	WRITE(p, "  uv1.x = uv1.x * %d;\n", samples);
+	WRITE(p, "  uv1.x = uv1.x << %d;\n", Log2(samples));
 
-	WRITE(p, "  int yl = uv1.y / %d;\n", blkH);
-	WRITE(p, "  int yb = yl * %d;\n", blkH);
+	WRITE(p, "  int yl = uv1.y >> %d;\n", Log2(blkH));
+	WRITE(p, "  int yb = yl << %d;\n", Log2(blkH));
 	WRITE(p, "  int yoff = uv1.y - yb;\n");
 	WRITE(p, "  int xp = uv1.x + yoff * position.z;\n");
-	WRITE(p, "  int xel = xp / %d;\n", samples == 1 ? factor : blkW);
-	WRITE(p, "  int xb = xel / %d;\n", blkH);
-	WRITE(p, "  int xoff = xel - xb * %d;\n", blkH);
-	WRITE(p, "  int xl =  uv1.x * %d / %d;\n", factor, blkW);
-	WRITE(p, "  int xib = uv1.x * %d - xl * %d;\n", factor, blkW);
-	WRITE(p, "  int halfxb = xb / %d;\n", factor);
+	WRITE(p, "  int xel = xp >> %d;\n", Log2(samples == 1 ? factor : blkW));
+	WRITE(p, "  int xb = xel >> %d;\n", Log2(blkH));
+	WRITE(p, "  int xoff = xel - (xb << %d);\n", Log2(blkH));
+	WRITE(p, "  int xl = (uv1.x << %d) >> %d;\n", Log2(factor), Log2(blkW));
+	WRITE(p, "  int xib = (uv1.x << %d) - (xl << %d);\n", Log2(factor), Log2(blkW));
+	WRITE(p, "  int halfxb = xb >> %d;\n", Log2(factor));
 
-	WRITE(p, "  sampleUv.x = xib + halfxb * %d;\n", blkW);
+	WRITE(p, "  sampleUv.x = xib + (halfxb << %d);\n", Log2(blkW));
 	WRITE(p, "  sampleUv.y = yb + xoff;\n");
-	WRITE(p, "  bool first = xb == (halfxb * 2);\n");
+	WRITE(p, "  bool first = xb == (halfxb << 1);\n");
 }
 
 void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)

From 8a4aa8c1f5ff0db2b531405bb087c1f53d2da4be Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Mon, 24 Feb 2014 16:15:44 +0100
Subject: [PATCH 5/9] Rewrite texture tiling implementation

inline halfxb

So we know which is the first pixel by masking.

inline xl

inline xb a bit

inline yl

inline uv1.x shift

remove likely wrong guessed ternary operator

add pixel layout comment

inline xel

optimize the shifts a bit

inline xb

optimize shifts in a second step

extract xb

rename all variables

calculate cache line by position.x

Revert 5115b459f40d53044cd7a858f52e6e876e1211b4 "optimize the shifts a bit"

It seems I was wrong, the other way is the more natural.

use x_virtual_position instead of uv1.x for x_offset_in_block

This looks more natural and the offset should be masked anyway.

substitude factor with cache_lines

move 32bit logic in a conditional block
---
 .../VideoCommon/TextureConversionShader.cpp   | 32 +++++++++----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index 0fca9df552..c85c028b93 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -66,8 +66,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	int blkW = TexDecoder_GetBlockWidthInTexels(format);
 	int blkH = TexDecoder_GetBlockHeightInTexels(format);
 	int samples = GetEncodedSampleCount(format);
-	// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
-	int factor = samples == 1 ? 2 : 1;
+
 	if (ApiType == API_OPENGL)
 	{
 		WRITE(p, "#define samp0 samp9\n");
@@ -91,22 +90,21 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	"  float2 uv0 = float2(0.0, 0.0);\n"
 	);
 
-	WRITE(p, "  uv1.x = uv1.x << %d;\n", Log2(samples));
+	WRITE(p, "  int y_block_position = uv1.y & ~(%d - 1);\n", blkH);
+	WRITE(p, "  int y_offset_in_block = uv1.y & (%d - 1);\n", blkH);
+	WRITE(p, "  int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", Log2(samples));
+	WRITE(p, "  int x_block_position = (x_virtual_position >> %d) & ~(%d - 1);\n", Log2(blkH), blkW);
+	if (samples == 1)
+	{
+		// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
+		WRITE(p, "  bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
+		WRITE(p, "  x_virtual_position = x_virtual_position << 1;\n");
+	}
+	WRITE(p, "  int x_offset_in_block = x_virtual_position & (%d - 1);\n", blkW);
+	WRITE(p, "  int y_offset = (x_virtual_position >> %d) & (%d - 1);\n", Log2(blkW), blkH);
 
-	WRITE(p, "  int yl = uv1.y >> %d;\n", Log2(blkH));
-	WRITE(p, "  int yb = yl << %d;\n", Log2(blkH));
-	WRITE(p, "  int yoff = uv1.y - yb;\n");
-	WRITE(p, "  int xp = uv1.x + yoff * position.z;\n");
-	WRITE(p, "  int xel = xp >> %d;\n", Log2(samples == 1 ? factor : blkW));
-	WRITE(p, "  int xb = xel >> %d;\n", Log2(blkH));
-	WRITE(p, "  int xoff = xel - (xb << %d);\n", Log2(blkH));
-	WRITE(p, "  int xl = (uv1.x << %d) >> %d;\n", Log2(factor), Log2(blkW));
-	WRITE(p, "  int xib = (uv1.x << %d) - (xl << %d);\n", Log2(factor), Log2(blkW));
-	WRITE(p, "  int halfxb = xb >> %d;\n", Log2(factor));
-
-	WRITE(p, "  sampleUv.x = xib + (halfxb << %d);\n", Log2(blkW));
-	WRITE(p, "  sampleUv.y = yb + xoff;\n");
-	WRITE(p, "  bool first = xb == (halfxb << 1);\n");
+	WRITE(p, "  sampleUv.x = x_offset_in_block + x_block_position;\n");
+	WRITE(p, "  sampleUv.y = y_block_position + y_offset;\n");
 }
 
 void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)

From 11efa88157f02e1e79edf947041cb0c6f2ca6c30 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Tue, 25 Feb 2014 16:06:55 +0100
Subject: [PATCH 6/9] calculate constant values on shader compilation

---
 Source/Core/VideoCommon/TextureConversionShader.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index c85c028b93..ce7f907bed 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -90,18 +90,18 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	"  float2 uv0 = float2(0.0, 0.0);\n"
 	);
 
-	WRITE(p, "  int y_block_position = uv1.y & ~(%d - 1);\n", blkH);
-	WRITE(p, "  int y_offset_in_block = uv1.y & (%d - 1);\n", blkH);
+	WRITE(p, "  int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
+	WRITE(p, "  int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
 	WRITE(p, "  int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", Log2(samples));
-	WRITE(p, "  int x_block_position = (x_virtual_position >> %d) & ~(%d - 1);\n", Log2(blkH), blkW);
+	WRITE(p, "  int x_block_position = (x_virtual_position >> %d) & %d;\n", Log2(blkH), ~(blkW - 1));
 	if (samples == 1)
 	{
 		// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
 		WRITE(p, "  bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
 		WRITE(p, "  x_virtual_position = x_virtual_position << 1;\n");
 	}
-	WRITE(p, "  int x_offset_in_block = x_virtual_position & (%d - 1);\n", blkW);
-	WRITE(p, "  int y_offset = (x_virtual_position >> %d) & (%d - 1);\n", Log2(blkW), blkH);
+	WRITE(p, "  int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
+	WRITE(p, "  int y_offset = (x_virtual_position >> %d) & %d;\n", Log2(blkW), blkH - 1);
 
 	WRITE(p, "  sampleUv.x = x_offset_in_block + x_block_position;\n");
 	WRITE(p, "  sampleUv.y = y_block_position + y_offset;\n");

From 1d0b6a11564644f7fa86faedd164833b3c05f669 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Tue, 25 Feb 2014 22:21:15 +0100
Subject: [PATCH 7/9] Merge duplicate parts of sampler into header

---
 .../VideoCommon/TextureConversionShader.cpp   | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index ce7f907bed..1d2e01b8f2 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -87,7 +87,6 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	WRITE(p, "{\n"
 	"  int2 sampleUv;\n"
 	"  int2 uv1 = int2(gl_FragCoord.xy);\n"
-	"  float2 uv0 = float2(0.0, 0.0);\n"
 	);
 
 	WRITE(p, "  int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
@@ -105,19 +104,21 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 
 	WRITE(p, "  sampleUv.x = x_offset_in_block + x_block_position;\n");
 	WRITE(p, "  sampleUv.y = y_block_position + y_offset;\n");
+
+	WRITE(p, "  float2 uv0 = float2(sampleUv);\n");                // sampleUv is the sample position in (int)gx_coords
+	WRITE(p, "  uv0 += float2(0.5, 0.5);\n");                      // move to center of pixel
+	WRITE(p, "  uv0 *= float(position.w);\n");                     // scale by two if needed (this will move to pixels border to filter linear)
+	WRITE(p, "  uv0 += float2(position.xy);\n");                   // move to copyed rect
+	WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); // normlize to [0:1]
+	WRITE(p, "  uv0.y = 1.0-uv0.y;\n");                            // ogl foo (disable this line for d3d)
+
+	WRITE(p, "  float sample_offset = position.w / float(%d);\n", EFB_WIDTH);
 }
 
 void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
 {
-	WRITE(p,                                          // sampleUv is the sample position in (int)gx_coords
-		"uv0 = float2(sampleUv + int2(%d, 0));\n" // pixel offset (if more than one pixel is samped)
-		"uv0 += float2(0.5, 0.5);\n"              // move to center of pixel
-		"uv0 *= float(position.w);\n"             // scale by two if needed (this will move to pixels border to filter linear)
-		"uv0 += float2(position.xy);\n"           // move to copyed rect
-		"uv0 /= float2(%d, %d);\n"                // normlize to [0:1]
-		"uv0.y = 1.0-uv0.y;\n"                    // ogl foo (disable this line for d3d)
-		"%s = texture(samp0, uv0).%s;\n",
-		xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp
+	WRITE(p, "  %s = texture(samp0, uv0 + float2(%d, 0) * sample_offset).%s;\n",
+		dest, xoffset, colorComp
 	);
 }
 

From aaaa5af0b23b25b1eab3cf37ea6f0cb70c125559 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Tue, 25 Feb 2014 15:52:22 +0100
Subject: [PATCH 8/9] remove (ATTR|VARY)(IN|OUT) macros

---
 .../VideoBackends/OGL/ProgramShaderCache.cpp  | 10 ++-----
 Source/Core/VideoBackends/OGL/RasterFont.cpp  |  8 +++---
 Source/Core/VideoBackends/OGL/Render.cpp      |  8 +++---
 .../Core/VideoBackends/OGL/TextureCache.cpp   |  6 ++---
 .../VideoBackends/OGL/TextureConverter.cpp    |  6 ++---
 Source/Core/VideoCommon/PixelShaderGen.cpp    | 10 +++----
 Source/Core/VideoCommon/VertexShaderGen.cpp   | 26 +++++++++----------
 7 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
index 79826860a7..954755cb95 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
@@ -452,12 +452,6 @@ void ProgramShaderCache::CreateHeader ( void )
 		// Precision defines for GLSLES3
 		"%s\n"
 
-		"\n"// A few required defines and ones that will make our lives a lot easier
-		"#define ATTRIN in\n"
-		"#define ATTROUT out\n"
-		"#define VARYIN %s\n"
-		"#define VARYOUT %s\n"
-
 		// Silly differences
 		"#define float2 vec2\n"
 		"#define float3 vec3\n"
@@ -472,6 +466,7 @@ void ProgramShaderCache::CreateHeader ( void )
 
 		// Terrible hack, look at DriverDetails.h
 		"%s\n"
+		"%s\n"
 
 		, v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
 		, v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
@@ -480,9 +475,8 @@ void ProgramShaderCache::CreateHeader ( void )
 
 		, v==GLSLES3 ? "precision highp float;" : ""
 
-		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : "centroid in"
-		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : "centroid out"
 		, DriverDetails::HasBug(DriverDetails::BUG_BROKENTEXTURESIZE) ? "#define textureSize(x, y) ivec2(1, 1)" : ""
+		, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "#define centroid" : ""
 	);
 }
 
diff --git a/Source/Core/VideoBackends/OGL/RasterFont.cpp b/Source/Core/VideoBackends/OGL/RasterFont.cpp
index e5458ee455..1f2eefdb8b 100644
--- a/Source/Core/VideoBackends/OGL/RasterFont.cpp
+++ b/Source/Core/VideoBackends/OGL/RasterFont.cpp
@@ -115,9 +115,9 @@ const u8 rasters[char_count][char_height] = {
 
 static const char *s_vertexShaderSrc =
 	"uniform vec2 charSize;\n"
-	"ATTRIN vec2 rawpos;\n"
-	"ATTRIN vec2 tex0;\n"
-	"VARYOUT vec2 uv0;\n"
+	"in vec2 rawpos;\n"
+	"in vec2 tex0;\n"
+	"out vec2 uv0;\n"
 	"void main(void) {\n"
 	"	gl_Position = vec4(rawpos,0,1);\n"
 	"	uv0 = tex0 * charSize;\n"
@@ -126,7 +126,7 @@ static const char *s_vertexShaderSrc =
 static const char *s_fragmentShaderSrc =
 	"uniform sampler2D samp8;\n"
 	"uniform vec4 color;\n"
-	"VARYIN vec2 uv0;\n"
+	"in vec2 uv0;\n"
 	"out vec4 ocol0;\n"
 	"void main(void) {\n"
 	"	ocol0 = texture(samp8,uv0) * color;\n"
diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp
index 433b71847a..b4530a0bdb 100644
--- a/Source/Core/VideoBackends/OGL/Render.cpp
+++ b/Source/Core/VideoBackends/OGL/Render.cpp
@@ -652,14 +652,14 @@ void Renderer::Init()
 	s_pfont = new RasterFont();
 
 	ProgramShaderCache::CompileShader(s_ShowEFBCopyRegions,
-		"ATTRIN vec2 rawpos;\n"
-		"ATTRIN vec3 color0;\n"
-		"VARYOUT vec4 c;\n"
+		"in vec2 rawpos;\n"
+		"in vec3 color0;\n"
+		"out vec4 c;\n"
 		"void main(void) {\n"
 		"	gl_Position = vec4(rawpos, 0.0, 1.0);\n"
 		"	c = vec4(color0, 1.0);\n"
 		"}\n",
-		"VARYIN vec4 c;\n"
+		"in vec4 c;\n"
 		"out vec4 ocol0;\n"
 		"void main(void) {\n"
 		"	ocol0 = c;\n"
diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp
index 29a2a13a27..37d96b88e7 100644
--- a/Source/Core/VideoBackends/OGL/TextureCache.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp
@@ -346,7 +346,7 @@ TextureCache::TextureCache()
 	const char *pColorMatrixProg =
 		"uniform sampler2D samp9;\n"
 		"uniform vec4 colmat[7];\n"
-		"VARYIN vec2 uv0;\n"
+		"in vec2 uv0;\n"
 		"out vec4 ocol0;\n"
 		"\n"
 		"void main(){\n"
@@ -358,7 +358,7 @@ TextureCache::TextureCache()
 	const char *pDepthMatrixProg =
 		"uniform sampler2D samp9;\n"
 		"uniform vec4 colmat[5];\n"
-		"VARYIN vec2 uv0;\n"
+		"in vec2 uv0;\n"
 		"out vec4 ocol0;\n"
 		"\n"
 		"void main(){\n"
@@ -369,7 +369,7 @@ TextureCache::TextureCache()
 		"}\n";
 
 	const char *VProgram =
-		"VARYOUT vec2 uv0;\n"
+		"out vec2 uv0;\n"
 		"uniform sampler2D samp9;\n"
 		"uniform vec4 copy_position;\n" // left, top, right, bottom
 		"void main()\n"
diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
index dbbb605f6b..514dbd22f6 100644
--- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp
+++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp
@@ -68,7 +68,7 @@ void CreatePrograms()
 	 */
 	// Output is BGRA because that is slightly faster than RGBA.
 	const char *VProgramRgbToYuyv =
-		"VARYOUT vec2 uv0;\n"
+		"out vec2 uv0;\n"
 		"uniform vec4 copy_position;\n" // left, top, right, bottom
 		"uniform sampler2D samp9;\n"
 		"void main()\n"
@@ -79,7 +79,7 @@ void CreatePrograms()
 		"}\n";
 	const char *FProgramRgbToYuyv =
 		"uniform sampler2D samp9;\n"
-		"VARYIN vec2 uv0;\n"
+		"in vec2 uv0;\n"
 		"out vec4 ocol0;\n"
 		"void main()\n"
 		"{\n"
@@ -110,7 +110,7 @@ void CreatePrograms()
 		"}\n";
 	const char *FProgramYuyvToRgb =
 		"uniform sampler2D samp9;\n"
-		"VARYIN vec2 uv0;\n"
+		"in vec2 uv0;\n"
 		"out vec4 ocol0;\n"
 		"void main()\n"
 		"{\n"
diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp
index 66965139b8..e51294e55c 100644
--- a/Source/Core/VideoCommon/PixelShaderGen.cpp
+++ b/Source/Core/VideoCommon/PixelShaderGen.cpp
@@ -311,19 +311,19 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
 		if (per_pixel_depth)
 			out.Write("#define depth gl_FragDepth\n");
 
-		out.Write("VARYIN float4 colors_02;\n");
-		out.Write("VARYIN float4 colors_12;\n");
+		out.Write("centroid in float4 colors_02;\n");
+		out.Write("centroid in float4 colors_12;\n");
 
 		// compute window position if needed because binding semantic WPOS is not widely supported
 		// Let's set up attributes
 		for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
 		{
-			out.Write("VARYIN float3 uv%d_2;\n", i);
+			out.Write("centroid in float3 uv%d_2;\n", i);
 		}
-		out.Write("VARYIN float4 clipPos_2;\n");
+		out.Write("centroid in float4 clipPos_2;\n");
 		if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
 		{
-			out.Write("VARYIN float4 Normal_2;\n");
+			out.Write("centroid in float4 Normal_2;\n");
 		}
 
 		if (forced_early_z)
diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp
index 67cb4c1231..3e2e02eecd 100644
--- a/Source/Core/VideoCommon/VertexShaderGen.cpp
+++ b/Source/Core/VideoCommon/VertexShaderGen.cpp
@@ -106,26 +106,26 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
 
 	if(api_type == API_OPENGL)
 	{
-		out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
+		out.Write("in float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
 		if (components & VB_HAS_POSMTXIDX)
-			out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
+			out.Write("in float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
 		if (components & VB_HAS_NRM0)
-			out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
+			out.Write("in float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
 		if (components & VB_HAS_NRM1)
-			out.Write("ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
+			out.Write("in float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
 		if (components & VB_HAS_NRM2)
-			out.Write("ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
+			out.Write("in float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
 
 		if (components & VB_HAS_COL0)
-			out.Write("ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
+			out.Write("in float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
 		if (components & VB_HAS_COL1)
-			out.Write("ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
+			out.Write("in float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
 
 		for (int i = 0; i < 8; ++i)
 		{
 			u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
 			if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
-				out.Write("ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
+				out.Write("in float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
 		}
 
 		// Let's set up attributes
@@ -133,15 +133,15 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
 		{
 			if (i < xfregs.numTexGen.numTexGens)
 			{
-				out.Write("VARYOUT  float3 uv%d_2;\n", i);
+				out.Write("centroid out  float3 uv%d_2;\n", i);
 			}
 		}
-		out.Write("VARYOUT   float4 clipPos_2;\n");
+		out.Write("centroid out   float4 clipPos_2;\n");
 		if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
-			out.Write("VARYOUT   float4 Normal_2;\n");
+			out.Write("centroid out   float4 Normal_2;\n");
 
-		out.Write("VARYOUT   float4 colors_02;\n");
-		out.Write("VARYOUT   float4 colors_12;\n");
+		out.Write("centroid out   float4 colors_02;\n");
+		out.Write("centroid out   float4 colors_12;\n");
 
 		out.Write("void main()\n{\n");
 	}

From f628695d31f09ad14dd9ae65c318470160698aa7 Mon Sep 17 00:00:00 2001
From: degasus <wickmarkus@web.de>
Date: Wed, 26 Feb 2014 12:48:52 +0100
Subject: [PATCH 9/9] comment fixes

---
 .../Core/VideoBackends/OGL/ProgramShaderCache.cpp   |  6 +++---
 Source/Core/VideoCommon/TextureConversionShader.cpp | 13 ++++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
index 954755cb95..0de9034f51 100644
--- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
+++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp
@@ -464,9 +464,9 @@ void ProgramShaderCache::CreateHeader ( void )
 		"#define frac fract\n"
 		"#define lerp mix\n"
 
-		// Terrible hack, look at DriverDetails.h
-		"%s\n"
-		"%s\n"
+		// Terrible hacks, look at DriverDetails.h
+		"%s\n" // replace textureSize as constant
+		"%s\n" // wipe out all centroid usages
 
 		, v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
 		, v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp
index 1d2e01b8f2..47f9df21ba 100644
--- a/Source/Core/VideoCommon/TextureConversionShader.cpp
+++ b/Source/Core/VideoCommon/TextureConversionShader.cpp
@@ -95,7 +95,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 	WRITE(p, "  int x_block_position = (x_virtual_position >> %d) & %d;\n", Log2(blkH), ~(blkW - 1));
 	if (samples == 1)
 	{
-		// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
+		// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
 		WRITE(p, "  bool first = 0 == (x_virtual_position & %d);\n", 8 * samples); // first cache line, used in the encoders
 		WRITE(p, "  x_virtual_position = x_virtual_position << 1;\n");
 	}
@@ -107,10 +107,13 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
 
 	WRITE(p, "  float2 uv0 = float2(sampleUv);\n");                // sampleUv is the sample position in (int)gx_coords
 	WRITE(p, "  uv0 += float2(0.5, 0.5);\n");                      // move to center of pixel
-	WRITE(p, "  uv0 *= float(position.w);\n");                     // scale by two if needed (this will move to pixels border to filter linear)
-	WRITE(p, "  uv0 += float2(position.xy);\n");                   // move to copyed rect
-	WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); // normlize to [0:1]
-	WRITE(p, "  uv0.y = 1.0-uv0.y;\n");                            // ogl foo (disable this line for d3d)
+	WRITE(p, "  uv0 *= float(position.w);\n");                     // scale by two if needed (also move to pixel borders so that linear filtering will average adjacent pixel)
+	WRITE(p, "  uv0 += float2(position.xy);\n");                   // move to copied rect
+	WRITE(p, "  uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); // normalize to [0:1]
+	if (ApiType == API_OPENGL)                                     // ogl has to flip up and down
+	{
+		WRITE(p, "  uv0.y = 1.0-uv0.y;\n");
+	}
 
 	WRITE(p, "  float sample_offset = position.w / float(%d);\n", EFB_WIDTH);
 }