因工作上需要將url編碼轉換為utf8的正常編碼,
(這事能在程式端坐,但想同時測試在DB中的效能比較)
因為完全就是收集網上高手的code在此標明來源.
一開始在自己的開發環境使用下面的function都是正常的.
來源 : Mysql urlencode urldecode 函数
Encode
DROP FUNCTION IF EXISTS urlencode;
DELIMITER |
CREATE FUNCTION urlencode (s VARCHAR(4096)) RETURNS VARCHAR(4096)
DETERMINISTIC
CONTAINS SQL
BEGIN
DECLARE c VARCHAR(4096) DEFAULT '';
DECLARE pointer INT DEFAULT 1;
DECLARE s2 VARCHAR(4096) DEFAULT '';
IF ISNULL(s) THEN
RETURN NULL;
ELSE
SET s2 = '';
WHILE pointer <= length(s) DO
SET c = MID(s,pointer,1);
IF c = ' ' THEN
SET c = '+';
ELSEIF NOT (ASCII(c) BETWEEN 48 AND 57 OR
ASCII(c) BETWEEN 65 AND 90 OR
ASCII(c) BETWEEN 97 AND 122) THEN
SET c = concat("%",LPAD(CONV(ASCII(c),10,16),2,0));
END IF;
SET s2 = CONCAT(s2,c);
SET pointer = pointer + 1;
END while;
END IF;
RETURN s2;
END;
|
DELIMITER ;
Decode
DROP FUNCTION IF EXISTS urldecode;
DELIMITER |
CREATE FUNCTION urldecode (s VARCHAR(4096)) RETURNS VARCHAR(4096)
DETERMINISTIC
CONTAINS SQL
BEGIN
DECLARE c VARCHAR(4096) DEFAULT '';
DECLARE pointer INT DEFAULT 1;
DECLARE h CHAR(2);
DECLARE h1 CHAR(1);
DECLARE h2 CHAR(1);
DECLARE s2 VARCHAR(4096) DEFAULT '';
IF ISNULL(s) THEN
RETURN NULL;
ELSE
SET s2 = '';
WHILE pointer <= LENGTH(s) DO
SET c = MID(s,pointer,1);
IF c = '+' THEN
SET c = ' ';
ELSEIF c = '%' AND pointer + 2 <= LENGTH(s) THEN
SET h1 = LOWER(MID(s,pointer+1,1));
SET h2 = LOWER(MID(s,pointer+2,1));
IF (h1 BETWEEN '0' AND '9' OR h1 BETWEEN 'a' AND 'f')
AND
(h2 BETWEEN '0' AND '9' OR h2 BETWEEN 'a' AND 'f')
THEN
SET h = CONCAT(h1,h2);
SET pointer = pointer + 2;
SET c = CHAR(CONV(h,16,10));
END IF;
END IF;
SET s2 = CONCAT(s2,c);
SET pointer = pointer + 1;
END while;
END IF;
RETURN s2;
END;
|
DELIMITER ;
但換到RD的環境時,解碼的部分卻都顯示?????? 或者出現Error 1366 : Incorrect string value '\xE9' for column 'c'
參考 : mysql中Incorrect string value乱码问题解决方案
後來發現是utf8和utf8mb4的問題,不能存到utf8的varchar變數內.
因為utf8是以3bytse來時做unicode的存放,而utf8mb4 (max bytes 4)可以存到4個bytes,就不會出錯.
而character與collation設定有分很多部分,(database, table, column, server, client, result, ...)頗麻煩.
而最後的情況是在utf8的環境上執行,所以下面又去找了utf8直接可以用的decode/encode functions.
(上面的就給utf8mb4使用)
來源 : mysql urlencode 支持中文
Encode
DELIMITER ;
DROP FUNCTION IF EXISTS urlencode;
DELIMITER |
CREATE FUNCTION URLENCODE(str VARCHAR(4096) CHARSET utf8) RETURNS VARCHAR(4096) CHARSET utf8
DETERMINISTIC
CONTAINS SQL
BEGIN
-- the individual character we are converting in our loop
-- NOTE: must be VARCHAR even though it won't vary in length
-- CHAR(1), when used with SUBSTRING, made spaces '' instead of ' '
DECLARE sub VARCHAR(1) CHARSET utf8;
-- the ordinal value of the character (i.e. ñ becomes 50097)
DECLARE val BIGINT DEFAULT 0;
-- the substring index we use in our loop (one-based)
DECLARE ind INT DEFAULT 1;
-- the integer value of the individual octet of a character being encoded
-- (which is potentially multi-byte and must be encoded one byte at a time)
DECLARE OCT INT DEFAULT 0;
-- the encoded return string that we build up during execution
DECLARE ret VARCHAR(4096) DEFAULT '';
-- our loop index for looping through each octet while encoding
DECLARE octind INT DEFAULT 0;
IF ISNULL(str) THEN
RETURN NULL;
ELSE
SET ret = '';
-- loop through the input string one character at a time - regardless
-- of how many bytes a character consists of
WHILE ind <= CHAR_LENGTH(str) DO
SET sub = MID(str, ind, 1);
SET val = ORD(sub);
-- these values are ones that should not be converted
-- see http://tools.ietf.org/html/rfc3986
IF NOT (val BETWEEN 48 AND 57 OR -- 48-57 = 0-9
val BETWEEN 65 AND 90 OR -- 65-90 = A-Z
val BETWEEN 97 AND 122 OR -- 97-122 = a-z
-- 45 = hyphen, 46 = period, 95 = underscore, 126 = tilde
val IN (45, 46, 95, 126)) THEN
-- This is not an "unreserved" char and must be encoded:
-- loop through each octet of the potentially multi-octet character
-- and convert each into its hexadecimal value
-- we start with the high octect because that is the order that ORD
-- returns them in - they need to be encoded with the most significant
-- byte first
SET octind = OCTET_LENGTH(sub);
WHILE octind > 0 DO
-- get the actual value of this octet by shifting it to the right
-- so that it is at the lowest byte position - in other words, make
-- the octet/byte we are working on the entire number (or in even
-- other words, oct will no be between zero and 255 inclusive)
SET OCT = (val >> (8 * (octind - 1)));
-- we append this to our return string with a percent sign, and then
-- a left-zero-padded (to two characters) string of the hexadecimal
-- value of this octet)
SET ret = CONCAT(ret, '%', LPAD(HEX(OCT), 2, 0));
-- now we need to reset val to essentially zero out the octet that we
-- just encoded so that our number decreases and we are only left with
-- the lower octets as part of our integer
SET val = (val & (POWER(256, (octind - 1)) - 1));
SET octind = (octind - 1);
END WHILE;
ELSE
-- this character was not one that needed to be encoded and can simply be
-- added to our return string as-is
SET ret = CONCAT(ret, sub);
END IF;
SET ind = (ind + 1);
END WHILE;
END IF;
RETURN ret;
END;
|
DELIMITER ;
來源 : 支持utf8的mysql urldecode funtion
Decode
DELIMITER $$
DROP FUNCTION IF EXISTS `url_decode` $$
CREATE DEFINER=`root`@`%` FUNCTION `url_decode`(original_text TEXT) RETURNS TEXT CHARSET utf8
BEGIN
DECLARE new_text TEXT DEFAULT NULL;
DECLARE pointer INT DEFAULT 1;
DECLARE end_pointer INT DEFAULT 1;
DECLARE encoded_text TEXT DEFAULT NULL;
DECLARE result_text TEXT DEFAULT NULL;
SET new_text = REPLACE(original_text,'+',' ');
SET new_text = REPLACE(new_text,'%0A','\r\n');
SET pointer = LOCATE("%", new_text);
while pointer <> 0 && pointer < (CHAR_LENGTH(new_text) - 2) DO
SET end_pointer = pointer + 3;
while MID(new_text, end_pointer, 1) = "%" DO
SET end_pointer = end_pointer+3;
END while;
SET encoded_text = MID(new_text, pointer, end_pointer - pointer);
SET result_text = CONVERT(UNHEX(REPLACE(encoded_text, "%", "")) USING utf8);
SET new_text = REPLACE(new_text, encoded_text, result_text);
SET pointer = LOCATE("%", new_text, pointer + CHAR_LENGTH(result_text));
END while;
return new_text;
END $$
DELIMITER ;