[記錄用]MariaDB收集的url decode/encode functions－Aaron Yang｜痞客邦

Aug 30 Wed 2017 11:38
[記錄用]MariaDB收集的url decode/encode functions

close

因工作上需要將url編碼轉換為utf8的正常編碼,

(這事能在程式端坐,但想同時測試在DB中的效能比較)

因為完全就是收集網上高手的code在此標明來源.

一開始在自己的開發環境使用下面的function都是正常的.

來源 : Mysql urlencode urldecode 函数

Encode

DROP FUNCTION IF EXISTS urlencode;
DELIMITER |
CREATE FUNCTION urlencode (s VARCHAR(4096)) RETURNS VARCHAR(4096)
DETERMINISTIC
CONTAINS SQL
BEGIN
DECLARE c VARCHAR(4096) DEFAULT '';
DECLARE pointer INT DEFAULT 1;
DECLARE s2 VARCHAR(4096) DEFAULT '';

IF ISNULL(s) THEN
RETURN NULL;
ELSE
SET s2 = '';
WHILE pointer <= length(s) DO
SET c = MID(s,pointer,1);
IF c = ' ' THEN
SET c = '+';
ELSEIF NOT (ASCII(c) BETWEEN 48 AND 57 OR
ASCII(c) BETWEEN 65 AND 90 OR
ASCII(c) BETWEEN 97 AND 122) THEN
SET c = concat("%",LPAD(CONV(ASCII(c),10,16),2,0));
END IF;
SET s2 = CONCAT(s2,c);
SET pointer = pointer + 1;
END while;
END IF;
RETURN s2;
END;
|
DELIMITER ;

Decode

DROP FUNCTION IF EXISTS urldecode;
DELIMITER |
CREATE FUNCTION urldecode (s VARCHAR(4096)) RETURNS VARCHAR(4096)
DETERMINISTIC
CONTAINS SQL
BEGIN
DECLARE c VARCHAR(4096) DEFAULT '';
DECLARE pointer INT DEFAULT 1;
DECLARE h CHAR(2);
DECLARE h1 CHAR(1);
DECLARE h2 CHAR(1);
DECLARE s2 VARCHAR(4096) DEFAULT '';

IF ISNULL(s) THEN
RETURN NULL;
ELSE
SET s2 = '';
WHILE pointer <= LENGTH(s) DO
SET c = MID(s,pointer,1);
IF c = '+' THEN
SET c = ' ';
ELSEIF c = '%' AND pointer + 2 <= LENGTH(s) THEN
SET h1 = LOWER(MID(s,pointer+1,1));
SET h2 = LOWER(MID(s,pointer+2,1));
IF (h1 BETWEEN '0' AND '9' OR h1 BETWEEN 'a' AND 'f')
AND
(h2 BETWEEN '0' AND '9' OR h2 BETWEEN 'a' AND 'f')
THEN
SET h = CONCAT(h1,h2);
SET pointer = pointer + 2;
SET c = CHAR(CONV(h,16,10));
END IF;
END IF;
SET s2 = CONCAT(s2,c);
SET pointer = pointer + 1;
END while;
END IF;
RETURN s2;
END;
|
DELIMITER ;

但換到RD的環境時,解碼的部分卻都顯示?????? 或者出現Error 1366 : Incorrect string value '\xE9' for column 'c'
參考 : mysql中Incorrect string value乱码问题解决方案

後來發現是utf8和utf8mb4的問題,不能存到utf8的varchar變數內.

因為utf8是以3bytse來時做unicode的存放,而utf8mb4 (max bytes 4)可以存到4個bytes,就不會出錯.

而character與collation設定有分很多部分,(database, table, column, server, client, result, ...)頗麻煩.

而最後的情況是在utf8的環境上執行,所以下面又去找了utf8直接可以用的decode/encode functions.

(上面的就給utf8mb4使用)

來源 : mysql urlencode 支持中文

Encode

DELIMITER ;

DROP FUNCTION IF EXISTS urlencode;

DELIMITER |

CREATE FUNCTION URLENCODE(str VARCHAR(4096) CHARSET utf8) RETURNS VARCHAR(4096) CHARSET utf8
DETERMINISTIC
CONTAINS SQL
BEGIN
-- the individual character we are converting in our loop
-- NOTE: must be VARCHAR even though it won't vary in length
-- CHAR(1), when used with SUBSTRING, made spaces '' instead of ' '
DECLARE sub VARCHAR(1) CHARSET utf8;
-- the ordinal value of the character (i.e. ñ becomes 50097)
DECLARE val BIGINT DEFAULT 0;
-- the substring index we use in our loop (one-based)
DECLARE ind INT DEFAULT 1;
-- the integer value of the individual octet of a character being encoded
-- (which is potentially multi-byte and must be encoded one byte at a time)
DECLARE OCT INT DEFAULT 0;
-- the encoded return string that we build up during execution
DECLARE ret VARCHAR(4096) DEFAULT '';
-- our loop index for looping through each octet while encoding
DECLARE octind INT DEFAULT 0;

IF ISNULL(str) THEN
RETURN NULL;
ELSE
SET ret = '';
-- loop through the input string one character at a time - regardless
-- of how many bytes a character consists of
WHILE ind <= CHAR_LENGTH(str) DO
SET sub = MID(str, ind, 1);
SET val = ORD(sub);
-- these values are ones that should not be converted
-- see http://tools.ietf.org/html/rfc3986
IF NOT (val BETWEEN 48 AND 57 OR -- 48-57 = 0-9
val BETWEEN 65 AND 90 OR -- 65-90 = A-Z
val BETWEEN 97 AND 122 OR -- 97-122 = a-z
-- 45 = hyphen, 46 = period, 95 = underscore, 126 = tilde
val IN (45, 46, 95, 126)) THEN
-- This is not an "unreserved" char and must be encoded:
-- loop through each octet of the potentially multi-octet character
-- and convert each into its hexadecimal value
-- we start with the high octect because that is the order that ORD
-- returns them in - they need to be encoded with the most significant
-- byte first
SET octind = OCTET_LENGTH(sub);
WHILE octind > 0 DO
-- get the actual value of this octet by shifting it to the right
-- so that it is at the lowest byte position - in other words, make
-- the octet/byte we are working on the entire number (or in even
-- other words, oct will no be between zero and 255 inclusive)
SET OCT = (val >> (8 * (octind - 1)));
-- we append this to our return string with a percent sign, and then
-- a left-zero-padded (to two characters) string of the hexadecimal
-- value of this octet)
SET ret = CONCAT(ret, '%', LPAD(HEX(OCT), 2, 0));
-- now we need to reset val to essentially zero out the octet that we
-- just encoded so that our number decreases and we are only left with
-- the lower octets as part of our integer
SET val = (val & (POWER(256, (octind - 1)) - 1));
SET octind = (octind - 1);
END WHILE;
ELSE
-- this character was not one that needed to be encoded and can simply be
-- added to our return string as-is
SET ret = CONCAT(ret, sub);
END IF;
SET ind = (ind + 1);
END WHILE;
END IF;
RETURN ret;
END;

|

DELIMITER ;

來源 : 支持utf8的mysql urldecode funtion

Decode

DELIMITER $$

DROP FUNCTION IF EXISTS `url_decode` $$
CREATE DEFINER=`root`@`%` FUNCTION `url_decode`(original_text TEXT) RETURNS TEXT CHARSET utf8
BEGIN
DECLARE new_text TEXT DEFAULT NULL;
DECLARE pointer INT DEFAULT 1;
DECLARE end_pointer INT DEFAULT 1;
DECLARE encoded_text TEXT DEFAULT NULL;
DECLARE result_text TEXT DEFAULT NULL;

SET new_text = REPLACE(original_text,'+',' ');
SET new_text = REPLACE(new_text,'%0A','\r\n');

SET pointer = LOCATE("%", new_text);
while pointer <> 0 && pointer < (CHAR_LENGTH(new_text) - 2) DO
SET end_pointer = pointer + 3;
while MID(new_text, end_pointer, 1) = "%" DO
SET end_pointer = end_pointer+3;
END while;

SET encoded_text = MID(new_text, pointer, end_pointer - pointer);
SET result_text = CONVERT(UNHEX(REPLACE(encoded_text, "%", "")) USING utf8);
SET new_text = REPLACE(new_text, encoded_text, result_text);
SET pointer = LOCATE("%", new_text, pointer + CHAR_LENGTH(result_text));
END while;

return new_text;

END $$

DELIMITER ;

urldecode urlencode mysql mariadb

abcg5

Aaron Yang

abcg5 發表在痞客邦留言(0) 人氣()

E-mail轉寄

全站分類：數位生活
個人分類：其他資料庫
此分類上一篇： MariaDB Lock - level比較
此分類下一篇：修復table marked as crashed
上一篇： MariaDB Lock - level比較
下一篇：修復table marked as crashed

留言列表

站方公告

活動快報

【船井...

newdirect

船井生醫推出專門針對高度用眼族、輕熟齡者的葉黃素... 看更多活動好康

我的好友

熱門文章

文章分類

最新文章

最新留言

動態訂閱

文章精選

所有文章列表

文章搜尋

新聞交換(RSS)

誰來我家

參觀人氣

本日人氣：
累積人氣：

QR Code

qrcode

POWERED BY

(登入)