SQLで組合せ最適化(メモ)
http://sqlzoo.net/wiki/SELECT_from_BBC_Tutorial
上記のSQL SERVERで試した
■自然数列挙
WITH Input(iStart, iEnd) AS (SELECT 1, 10) , NaturalNumber(n) AS ( SELECT iStart FROM Input UNION ALL SELECT n + 1 FROM NaturalNumber INNER JOIN Input ON NaturalNumber.n + 1 <= Input.iEnd ) SELECT n FROM NaturalNumber ORDER BY n
■全パターン列挙
順列・組合せ、ではなく、1ノードからnノードの組まで列挙
WITH Input(iStart, iEnd) AS (SELECT 1, 8) , NaturalNumber(n) AS ( SELECT iStart FROM Input UNION ALL SELECT n + 1 FROM NaturalNumber INNER JOIN Input ON NaturalNumber.n + 1 <= Input.iEnd ) , Enum(id, idList) AS ( SELECT n, CAST(n AS VARCHAR(MAX)) FROM NaturalNumber UNION ALL SELECT b.n, CAST(a.idList + ',' + CAST(b.n AS VARCHAR(MAX)) AS VARCHAR(MAX)) FROM Enum a JOIN NaturalNumber b ON a.id < b.n ) SELECT * FROM Enum ORDER BY len(idList), idList
■巡回セールスマン全探索
WITH Nodes(n, name) AS ( SELECT 1, 'チューリッヒ' UNION ALL SELECT 2, 'ロンドン' UNION ALL SELECT 3, 'ベルリン' UNION ALL SELECT 4, 'ローマ' UNION ALL SELECT 5, 'マドリッド' ) , Edges(path, eg, cost) AS ( SELECT '1,2', 'チューリッヒ,ロンドン', 476 UNION ALL SELECT '2,1', 'ロンドン,チューリッヒ', 476 UNION ALL SELECT '2,3', 'ロンドン,ベルリン', 569 UNION ALL SELECT '3,2', 'ベルリン,ロンドン', 569 UNION ALL SELECT '3,1', 'ベルリン,チューリッヒ', 408 UNION ALL SELECT '1,3', 'チューリッヒ,ベルリン', 408 UNION ALL SELECT '4,3', 'ローマ,ベルリン', 736 UNION ALL SELECT '3,4', 'ベルリン,ローマ', 736 UNION ALL SELECT '4,1', 'ローマ,チューリッヒ', 434 UNION ALL SELECT '1,4', 'チューリッヒ,ローマ', 434 UNION ALL SELECT '4,2', 'ローマ,ロンドン', 894 UNION ALL SELECT '2,4', 'ロンドン,ローマ', 894 UNION ALL SELECT '5,4', 'マドリッド,ローマ', 852 UNION ALL SELECT '4,5', 'ローマ,マドリッド', 852 UNION ALL SELECT '5,1', 'マドリッド,チューリッヒ', 774 UNION ALL SELECT '1,5', 'チューリッヒ,マドリッド', 774 UNION ALL SELECT '5,2', 'マドリッド,ロンドン', 784 UNION ALL SELECT '2,5', 'ロンドン,マドリッド', 784 UNION ALL SELECT '5,3', 'マドリッド,ベルリン', 1154 UNION ALL SELECT '3,5', 'ベルリン,マドリッド', 1154 ) -- pathの組合せ列挙し積上げる , Head AS (SELECT n, name FROM Nodes WHERE name = 'チューリッヒ') , Enum(id, idList, pList) AS ( SELECT n, CAST(n AS VARCHAR(MAX)), CAST(name AS VARCHAR(MAX)) FROM Head UNION ALL SELECT b.n , CAST(a.idList + ',' + CAST(b.n AS VARCHAR(MAX)) AS VARCHAR(MAX)) , a.pList + ',' + (SELECT SUBSTRING(e.eg, CHARINDEX(',', e.eg)+1, 999) FROM Edges e WHERE e.path = CAST(a.id AS VARCHAR(MAX)) + ',' + CAST(b.n AS VARCHAR(MAX))) FROM Enum a JOIN Nodes b ON CHARINDEX(CAST(b.n AS VARCHAR(MAX)), a.idList) = 0 ) -- 制約・整形 , Domain(idList, pList) AS ( SELECT idList + ',1', pList + ',{*}' FROM Enum WHERE LEN(idList) = LEN('1,2,3,4,5') AND CHARINDEX('1', idList) = 1 ) -- Rank , Rank(idList, pList, cost) AS ( SELECT c.idList, c.pList, SUM(cost) FROM Domain c LEFT JOIN Edges e ON CHARINDEX(e.path, c.idList) > 0 GROUP BY c.idList, c.pList ) -- 最適化 SELECT * FROM Rank ORDER BY cost
■ナップザック問題全探索
WITH goods AS ( SELECT 1 id, 7 val,10 kg UNION ALL SELECT 2 id, 2 val, 4 kg UNION ALL SELECT 3 id, 9 val, 5 kg UNION ALL SELECT 4 id, 4 val, 1 kg UNION ALL SELECT 5 id, 9 val, 7 kg UNION ALL SELECT 6 id, 7 val, 3 kg UNION ALL SELECT 7 id, 4 val, 6 kg UNION ALL SELECT 8 id, 5 val, 3 kg ) ,rec(id,idList,sumVal,SumKg) as -- 最初の種はid:1からid:8まで (SELECT id, CAST(id AS varchar(max)), val, kg FROM goods UNION ALL -- 種に対して、idが上の(大きい)レコードを積上げる(重複排除) SELECT b.id, CAST(a.idList + ',' + CAST(b.id AS varchar(max)) AS varchar(max)) , a.sumVal+b.val , a.SumKg+b.kg FROM rec a JOIN goods b ON a.id < b.id -- 制約条件 WHERE a.SumKg+b.kg <= 20 ) ,tmp AS (SELECT id, idList, sumVal, sumKg , max(sumVal) over() AS maxSumVal FROM rec) SELECT id, idList, sumVal, sumKg, maxSumVal FROM tmp --ORDER BY LEN(idList), idList -- 目的関数 WHERE sumVal = maxSumVal
■最小木問題
クラスカル法では、素集合データ構造を使用して閉路が
無いことを確認するが、SQLなので、グローバル変数を
導入するのに、スクリプトを導入することになる。
それでは、本末転倒なので、怪しい方法をとった。
これは、正しいか ?
WITH Nodes(n, name) AS ( SELECT 1, 'アリゲータ' UNION ALL SELECT 2, 'ブル' UNION ALL SELECT 3, 'ホワイトベア' UNION ALL SELECT 4, 'シャーク' UNION ALL SELECT 5, 'コンドル' ) , Edges(path, eg, cost) AS ( SELECT '1,2', 'アリゲータ,ブル', 1 UNION ALL SELECT '1,3', 'アリゲータ,ホワイトベア', 2 UNION ALL SELECT '2,3', 'ブル,ホワイトベア', 1 UNION ALL SELECT '2,4', 'ブル,シャーク', 3 UNION ALL SELECT '3,4', 'ホワイトベア,シャーク', 5 UNION ALL SELECT '3,5', 'ホワイトベア,コンドル', 3 UNION ALL SELECT '4,5', 'シャーク,コンドル', 5 ) , Edges2 AS ( SELECT ROW_NUMBER() OVER(ORDER BY cost, path) AS id , SUBSTRING(e.path, 1, CHARINDEX(',', e.path)-1) AS head , SUBSTRING(e.path, CHARINDEX(',', e.path)+1, 999) AS tail , e.* FROM Edges e ) -- 種を元にSTEP毎に、最初コストの辺を付与して積上げる , Enum(prev, idList, step) AS ( SELECT n, CAST('' AS VARCHAR(MAX)), 1 FROM Nodes UNION ALL SELECT a.prev , CASE WHEN CAST(a.prev AS VARCHAR(MAX)) IN (e.head, e.tail) AND LEN(a.idList) = 0 THEN e.path ELSE a.idList END , a.step + 1 FROM Enum a JOIN Edges2 e ON a.step = e.id ) , IDCount AS ( SELECT step, COUNT(*) AS cnt FROM Enum WHERE 1 < step GROUP BY step, LEN(idList) ) -- 最適化 , TopID AS ( SELECT TOP 1 step, COUNT(*) AS cnt FROM IDCount GROUP BY step HAVING COUNT(*) = 1 ) , MinPath AS ( SELECT DISTINCT en.step, en.idList FROM Enum en JOIN TopID t ON en.step = t.step ) SELECT m.*, e.eg, e.cost FROM MinPath m JOIN Edges e ON e.path = m.idList ORDER BY m.idList
上記は、postgreSQLなら文字列分割、文字列配列の連結が
可能なので、下記方法で正しく、閉路を確認できるかも ?
WITH Split AS (SELECT * FROM regexp_split_to_table('1,2,3#4,5,6', '#') a) , amp AS ( SELECT * FROM Split a LEFT JOIN regexp_split_to_table('3,2', ',') b ON strpos(a, b) > 0 ) -- ルートが木の連結でない場合は、差が出る SELECT (SELECT COUNT(*) FROM Amp) - (SELECT COUNT(*) FROM Split) AS diff
一応postgreSQLで、できたけど
こんなSQLでは、きびしい
動けば良いってもんでは無い
WITH RECURSIVE Nodes(n, name) AS ( SELECT 1, 'アリゲータ' UNION ALL SELECT 2, 'ブル' UNION ALL SELECT 3, 'ホワイトベア' UNION ALL SELECT 4, 'シャーク' UNION ALL SELECT 5, 'コンドル' ) , Edges(path, eg, cost) AS ( SELECT '1,2', 'アリゲータ,ブル', 1 UNION ALL SELECT '1,3', 'アリゲータ,ホワイトベア', 2 UNION ALL SELECT '2,3', 'ブル,ホワイトベア', 1 UNION ALL SELECT '2,4', 'ブル,シャーク', 3 UNION ALL SELECT '3,4', 'ホワイトベア,シャーク', 5 UNION ALL SELECT '3,5', 'ホワイトベア,コンドル', 3 UNION ALL SELECT '4,5', 'シャーク,コンドル', 5 ) , Edges2 AS ( SELECT ROW_NUMBER() OVER (ORDER BY cost) as no , SUBSTRING(e.path, 1, strpos(',', e.path)+1) AS head , SUBSTRING(e.path, strpos(',', e.path)+3, 999) AS tail , e.* FROM Edges e ) -- 例) '1#2#3#4#5' , FirstStep(trees) AS (SELECT array_to_string(ARRAY(select n from nodes), '#')) -- 種を元にSTEP毎に、最初コストの辺を付与して積上げる , Enum(prev, idList, trees, step, state) AS ( SELECT n, '', (SELECT trees FROM FirstStep), 1, '#' FROM Nodes UNION ALL SELECT prev , CASE WHEN prev IN (CAST(head AS INTEGER), CAST(tail AS INTEGER)) AND LENGTH(idList) = 0 AND diff = 0 THEN path ELSE idList END AS idList , CASE WHEN diff > 0 THEN 'NOFIX' || head || ',' || tail ||'#' || trees ELSE CASE WHEN STRPOS(trees, '#' || maxTree) > 0 THEN REPLACE(REPLACE(trees, mixTree, mixTree || ',' || maxTree), '#' || maxTree, '') WHEN STRPOS(trees, maxTree || '#') > 0 THEN REPLACE(REPLACE(trees, mixTree, mixTree || ',' || maxTree), maxTree || '#', '') ELSE trees END END AS trees , step + 1 AS step , TO_CHAR(diff, '999') FROM (SELECT T1.* , TO_NUMBER(diffMaxMin[1], '999') AS diff , diffMaxMin[2] AS mixTree , diffMaxMin[3] AS maxTree FROM (SELECT a.prev , e.head , e.tail , a.idList , a.trees , e.path , a.step , (SELECT ARRAY[TO_CHAR(diff, '999'), mixTree, maxTree] FROM (WITH Split AS (SELECT * FROM regexp_split_to_table(a.trees, '#') tree) , Amp AS ( SELECT * FROM Split TS LEFT JOIN UNNEST(ARRAY[e.head, e.tail]) ht ON strpos(TS.tree, ht) > 0 ) -- ルートが木の連結でない場合は、差が出る SELECT (SELECT COUNT(*) FROM Amp) - (SELECT COUNT(*) FROM Split) AS diff , (SELECT min(tree) FROM AMP WHERE ht is not null) AS mixTree , (SELECT MAX(tree) FROM AMP WHERE ht is not null) AS maxTree ) EndWith) AS diffMaxMin FROM Enum a JOIN Edges2 e ON a.step = e.no ) T1 ) T2 ) , IDCount AS ( SELECT step, COUNT(*) AS cnt FROM Enum WHERE 1 < step GROUP BY step, LENGTH(idList) ) -- 最適化 , TopID AS ( SELECT step, COUNT(*) AS cnt FROM IDCount GROUP BY step HAVING COUNT(*) = 1 LIMIT 1 ) , MinPath AS ( SELECT DISTINCT en.step, en.idList FROM Enum en JOIN TopID t ON en.step = t.step ) SELECT m.*, e.eg, e.cost FROM MinPath m JOIN Edges e ON e.path = m.idList ORDER BY m.idList
配列ONLYにして、整理した
だいぶ解りやすくなったと思う。
WITH RECURSIVE Nodes(n, name) AS ( SELECT 1, 'アリゲータ' UNION ALL SELECT 2, 'ブル' UNION ALL SELECT 3, 'ホワイトベア' UNION ALL SELECT 4, 'シャーク' UNION ALL SELECT 5, 'コンドル' ) , Edges(path, eg, cost) AS ( SELECT '1,2', 'アリゲータ,ブル', 1 UNION ALL SELECT '1,3', 'アリゲータ,ホワイトベア', 2 UNION ALL SELECT '2,3', 'ブル,ホワイトベア', 1 UNION ALL SELECT '2,4', 'ブル,シャーク', 3 UNION ALL SELECT '3,4', 'ホワイトベア,シャーク', 5 UNION ALL SELECT '3,5', 'ホワイトベア,コンドル', 3 UNION ALL SELECT '4,5', 'シャーク,コンドル', 5 ) , Edges2 AS ( SELECT ROW_NUMBER() OVER (ORDER BY cost) as no , SUBSTRING(e.path, 1, strpos(',', e.path)+1) AS head , SUBSTRING(e.path, strpos(',', e.path)+3, 999) AS tail , e.* FROM Edges e ) -- 例) ['1', '2', '3', '4', '5'] 単ノードの木のリスト , FirstStep(trees) AS (SELECT ARRAY(select n::text from nodes)) , Enum(prev, idList, trees, step, minTree, maxTree) AS ( -- 基底 -- Nodesを元種にする SELECT n, '', (SELECT trees FROM FirstStep), 1, 'minTree', 'maxTree' FROM Nodes UNION ALL -- 再帰部 -- STEP毎に、最初コストの辺を付与して積上げる SELECT prev -- 初回のminTree <> maxTreeの場合のみ、idListを充足 , CASE WHEN prev IN (CAST(head AS INTEGER), CAST(tail AS INTEGER)) AND LENGTH(idList) = 0 AND minTree <> maxTree THEN path ELSE idList END AS idList -- 2つの木の間の連結の場合のみ、木を連結 , CASE WHEN minTree = maxTree THEN trees ELSE -- 2つの木を連結 ARRAY_REMOVE( ARRAY_REPLACE(trees, minTree, minTree || ',' || maxTree) , maxTree) END AS trees , step + 1 AS step , minTree , maxTree FROM (SELECT T1.* , minMax[1] AS minTree , minMax[2] AS maxTree FROM (SELECT enm.prev , edg.head , edg.tail , enm.idList , enm.trees , edg.path , enm.step -- ルート(edg.head, edg.tail)が2つの木の間の連結でない場合は、MIN(tr) = MAX(tr) , (SELECT ARRAY[MIN(tr), MAX(tr)] FROM UNNEST(enm.trees) tr JOIN UNNEST(ARRAY[edg.head, edg.tail]) node ON node = ANY(string_to_array(tr, ','))) AS minMax FROM Enum enm JOIN Edges2 edg ON enm.step = edg.no ) T1 ) T2 ) -- 最適化 , MinPath AS ( SELECT DISTINCT en.step, en.idList FROM Enum en WHERE en.step = ( SELECT step FROM Enum WHERE ARRAY_LENGTH(trees, 1) = 1 ORDER BY step LIMIT 1 ) ) SELECT m.*, e.eg, e.cost FROM MinPath m JOIN Edges e ON e.path = m.idList ORDER BY m.idList
■巡回セールスマン全探索
出来損ない
WITH Input(iStart, iEnd) AS (SELECT 1, 5) , Edges(path, eg, cost) AS ( SELECT '1,2', 'チューリッヒ,ロンドン', 476 UNION ALL SELECT '2,1', 'ロンドン,チューリッヒ', 476 UNION ALL SELECT '2,3', 'ロンドン,ベルリン', 569 UNION ALL SELECT '3,2', 'ベルリン,ロンドン', 569 UNION ALL SELECT '3,1', 'ベルリン,チューリッヒ', 408 UNION ALL SELECT '1,3', 'チューリッヒ,ベルリン', 408 UNION ALL SELECT '4,3', 'ローマ,ベルリン', 736 UNION ALL SELECT '3,4', 'ベルリン,ローマ', 736 UNION ALL SELECT '4,1', 'ローマ,チューリッヒ', 434 UNION ALL SELECT '1,4', 'チューリッヒ,ローマ', 434 UNION ALL SELECT '4,2', 'ローマ,ロンドン', 894 UNION ALL SELECT '2,4', 'ロンドン,ローマ', 894 UNION ALL SELECT '5,4', 'マドリッド,ローマ', 852 UNION ALL SELECT '4,5', 'ローマ,マドリッド', 852 UNION ALL SELECT '5,1', 'マドリッド,チューリッヒ', 774 UNION ALL SELECT '1,5', 'チューリッヒ,マドリッド', 774 UNION ALL SELECT '5,2', 'マドリッド,ロンドン', 784 UNION ALL SELECT '2,5', 'ロンドン,マドリッド', 784 UNION ALL SELECT '5,3', 'マドリッド,ベルリン', 1154 UNION ALL SELECT '3,5', 'ベルリン,マドリッド', 1154 ) , NaturalNumber(n) AS ( SELECT iStart FROM Input UNION ALL SELECT n + 1 FROM NaturalNumber INNER JOIN Input ON NaturalNumber.n + 1 <= Input.iEnd ) , Enum(id, idList, pList) AS ( SELECT n, CAST(n AS VARCHAR(MAX)), CAST('チューリッヒ' AS VARCHAR(MAX)) FROM NaturalNumber UNION ALL SELECT b.n , CAST(a.idList + ',' + CAST(b.n AS VARCHAR(MAX)) AS VARCHAR(MAX)) , a.pList + ',' + (SELECT SUBSTRING(e.eg, CHARINDEX(',', e.eg)+1, 999) FROM Edges e WHERE e.path = CAST(a.id AS VARCHAR(MAX)) + ',' + CAST(b.n AS VARCHAR(MAX))) FROM Enum a JOIN NaturalNumber b ON CHARINDEX(CAST(b.n AS VARCHAR(MAX)), a.idList) = 0 ) , Domain(idList, pList) AS ( SELECT idList + ',1', pList + ',{*}' FROM Enum WHERE LEN(idList) = LEN('1,2,3,4,5') AND CHARINDEX('1', idList) = 1 ) SELECT c.idList, c.pList, SUM(cost) AS cost FROM Domain c LEFT JOIN Edges e ON CHARINDEX(e.path, c.idList) > 0 GROUP BY c.idList, c.pList ORDER BY SUM(cost)
■列挙
ノード数決め打ちなら、JOINでつなぐ
WITH Nodes(id, name) AS ( SELECT 1, '一郎' UNION ALL SELECT 2, '二郎' UNION ALL SELECT 3, '三郎' UNION ALL SELECT 4, '四郎' UNION ALL SELECT 5, '四郎' ) SELECT t1.name A,t2.name B FROM Nodes t1 JOIN Nodes t2 ON t1.id < t2.id ORDER BY t1.id, t2.id